[llvm-branch-commits] [llvm] [LoongArch] Broadcast repeated subsequence in build_vector instead of inserting per element (PR #154533)

2025-08-20 Thread via llvm-branch-commits

https://github.com/zhaoqi5 updated 
https://github.com/llvm/llvm-project/pull/154533

>From 3674bad63bffc351ecd099baef91f90b8d1a0866 Mon Sep 17 00:00:00 2001
From: Qi Zhao 
Date: Wed, 20 Aug 2025 20:39:50 +0800
Subject: [PATCH 1/4] [LoongArch] Broadcast repeated subsequence in
 build_vector instead of inserting per element

---
 .../LoongArch/LoongArchISelLowering.cpp   | 53 +++
 .../Target/LoongArch/LoongArchISelLowering.h  |  2 +
 .../LoongArch/LoongArchLASXInstrInfo.td   | 22 +++-
 3 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 5b2d185594f44..de2a27143c389 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2434,6 +2434,7 @@ static SDValue 
lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
   BuildVectorSDNode *Node = cast(Op);
+  MVT VT = Node->getSimpleValueType(0);
   EVT ResTy = Op->getValueType(0);
   unsigned NumElts = ResTy.getVectorNumElements();
   SDLoc DL(Op);
@@ -2517,6 +2518,56 @@ SDValue 
LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
   }
 
   if (!IsConstant) {
+// If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to 
fill
+// the sub-sequence of the vector and then broadcast the sub-sequence.
+SmallVector Sequence;
+BitVector UndefElements;
+if (Node->getRepeatedSequence(Sequence, &UndefElements)) {
+  // TODO: If the BUILD_VECTOR contains undef elements, consider falling
+  // back to use INSERT_VECTOR_ELT to materialize the vector, because it
+  // generates worse code in some cases. This could be further optimized
+  // with more consideration.
+  if (UndefElements.count() == 0) {
+unsigned SeqLen = Sequence.size();
+
+SDValue Op0 = Sequence[0];
+SDValue Vector = DAG.getUNDEF(ResTy);
+if (!Op0.isUndef())
+  Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
+for (unsigned i = 1; i < SeqLen; ++i) {
+  SDValue Opi = Sequence[i];
+  if (Opi.isUndef())
+continue;
+  Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
+   DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+}
+
+unsigned SplatLen = NumElts / SeqLen;
+MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
+MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
+
+// If size of the sub-sequence is half of a 256-bits vector, bitcast 
the
+// vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
+if (SplatEltTy == MVT::i128)
+  SplatTy = MVT::v4i64;
+
+SDValue SrcVec = DAG.getBitcast(SplatTy, Vector);
+SDValue SplatVec;
+if (SplatTy.is256BitVector()) {
+  SplatVec =
+  DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
+: LoongArchISD::XVREPLVE0,
+  DL, SplatTy, SrcVec);
+} else {
+  SplatVec =
+  DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
+  DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
+}
+
+return DAG.getBitcast(ResTy, SplatVec);
+  }
+}
+
 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
 // The resulting code is the same length as the expansion, but it doesn't
 // use memory operations.
@@ -6637,6 +6688,8 @@ const char 
*LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
 NODE_NAME_CASE(VREPLVEI)
 NODE_NAME_CASE(VREPLGR2VR)
 NODE_NAME_CASE(XVPERMI)
+NODE_NAME_CASE(XVREPLVE0)
+NODE_NAME_CASE(XVREPLVE0Q)
 NODE_NAME_CASE(VPICK_SEXT_ELT)
 NODE_NAME_CASE(VPICK_ZEXT_ELT)
 NODE_NAME_CASE(VREPLVE)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index f79ba7450cc36..9ab867a918f4e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -141,6 +141,8 @@ enum NodeType : unsigned {
   VREPLVEI,
   VREPLGR2VR,
   XVPERMI,
+  XVREPLVE0,
+  XVREPLVE0Q,
 
   // Extended vector element extraction
   VPICK_SEXT_ELT,
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td 
b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 0696b11d62ac9..962448fcb470d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -10,8 +10,13 @@
 //
 
//===--===//
 
+def SDT_LoongArchXVREPLVE0 : SDTypeProfile<1, 1, [SD

[llvm-branch-commits] [libc] [llvm] [libc][math] Refactor cospif16 implementation to header-only in src/__support/math folder. (PR #154222)

2025-08-20 Thread via llvm-branch-commits

https://github.com/lntue approved this pull request.


https://github.com/llvm/llvm-project/pull/154222
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread Kareem Ergawy via llvm-branch-commits


@@ -813,6 +874,13 @@ void OmpStructureChecker::Enter(const 
parser::OpenMPBlockConstruct &x) {
   "TARGET construct with nested TEAMS region contains statements or "
   "directives outside of the TEAMS construct"_err_en_US);
 }
+if (GetContext().directive == llvm::omp::Directive::OMPD_workdistribute &&
+GetContextParent().directive != llvm::omp::Directive::OMPD_teams) {
+  context_.Say(x.BeginDir().DirName().source,
+  "%s region can only be strictly nested within the "
+  "teams region"_err_en_US,

ergawy wrote:

nit: I think we capitalize construct keywords and do not use `the` in fron of 
them. Just to be consistent with the rest of the error messages.
```suggestion
  "%s region can only be strictly nested within TEAMS region"_err_en_US,
```

https://github.com/llvm/llvm-project/pull/154377
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread Kareem Ergawy via llvm-branch-commits


@@ -896,6 +964,17 @@ void OmpStructureChecker::Enter(const 
parser::OpenMPBlockConstruct &x) {
 HasInvalidWorksharingNesting(
 beginSpec.source, llvm::omp::nestedWorkshareErrSet);
 break;
+  case llvm::omp::OMPD_workdistribute:
+if (!CurrentDirectiveIsNested()) {
+  context_.Say(beginSpec.source,
+  "A workdistribute region must be nested inside teams region 
only."_err_en_US);

ergawy wrote:

nit: capitalization of construct names here as well.

https://github.com/llvm/llvm-project/pull/154377
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang] return type not correctly deduced for discarded lambdas (#153921) (PR #154080)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/154080
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Local] preserve `MD_prof` in `hoistAllInstructionsInto` (PR #154635)

2025-08-20 Thread Mircea Trofin via llvm-branch-commits


@@ -498,6 +498,7 @@ LLVM_ABI void dropDebugUsers(Instruction &I);
 ///
 /// The moved instructions receive the insertion point debug location values
 /// (DILocations) and their debug intrinsic instructions are removed.
+/// Selects and indirect calls keep their MD_prof metadata.

mtrofin wrote:

I believe this is different from 
https://github.com/llvm/llvm-project/pull/152420#discussion_r2264594682 because 
this is less generic - there, it was about whether a caller asking to 
`dropUnknown...Metadata` would be surprised to get `MD_prof` still there. Here, 
I'd argue the API is specifically about hoisting, and it's more reasonable to 
keep around some metadata. @nikic 

https://github.com/llvm/llvm-project/pull/154635
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [analyzer][docs] CSA release notes for clang-21 (PR #154600)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru approved this pull request.


https://github.com/llvm/llvm-project/pull/154600
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [TailDup] Delay aggressive computed-goto taildup to after RegAlloc. (#150911) (PR #151680)

2025-08-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

@fhahn (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/151680
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang][PAC][ObjC] Merge the block metadata support for the arm64e abi to llvm 21 (PR #153725)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

ping @AaronBallman for review

https://github.com/llvm/llvm-project/pull/153725
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] c587c24 - [TailDup] Delay aggressive computed-goto taildup to after RegAlloc. (#150911)

2025-08-20 Thread Florian Hahn via llvm-branch-commits

Author: Florian Hahn
Date: 2025-08-20T11:06:54+01:00
New Revision: c587c24db5cf31a0c45a475b05b20b4b724b0cec

URL: 
https://github.com/llvm/llvm-project/commit/c587c24db5cf31a0c45a475b05b20b4b724b0cec
DIFF: 
https://github.com/llvm/llvm-project/commit/c587c24db5cf31a0c45a475b05b20b4b724b0cec.diff

LOG: [TailDup] Delay aggressive computed-goto taildup to after RegAlloc. 
(#150911)

Back-ports additional tests (eb9febb4a6b0, dc697de12792), refactoring
(43c9c14577db) and functional change (18f1369297f4) in a single PR.

https://github.com/llvm/llvm-project/pull/114990 allowed more aggressive
tail duplication for computed-gotos in both pre- and post-regalloc tail
duplication.

In some cases, performing tail-duplication too early can lead to worse
results, especially if we duplicate blocks with a number of phi nodes.

This is causing a ~3% performance regression in some workloads using
Python 3.12.

This patch updates TailDup to delay aggressive tail-duplication for
computed gotos to after register allocation.

This means we can keep the non-duplicated version for a bit longer
throughout the backend, which should reduce compile-time as well as
allowing a number of optimizations and simplifications to trigger before
drastically expanding the CFG.

For the case in https://github.com/llvm/llvm-project/issues/106846, I
get the same performance with and without this patch on Skylake.

PR: https://github.com/llvm/llvm-project/pull/150911

Added: 
llvm/test/CodeGen/AArch64/late-taildup-computed-goto.ll
llvm/test/CodeGen/X86/early-tail-dup-computed-goto.mir
llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir

Modified: 
llvm/include/llvm/CodeGen/MachineBasicBlock.h
llvm/lib/CodeGen/TailDuplicator.cpp

Removed: 
llvm/test/CodeGen/X86/tail-dup-computed-goto.mir



diff  --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h 
b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 938d71dd030e8..9e3d9196cc184 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -323,10 +323,11 @@ class MachineBasicBlock
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
 
-  /// Returns true if the original IR terminator is an `indirectbr`. This
-  /// typically corresponds to a `goto` in C, rather than jump tables.
-  bool terminatorIsComputedGoto() const {
-return back().isIndirectBranch() &&
+  /// Returns true if the original IR terminator is an `indirectbr` with
+  /// successor blocks. This typically corresponds to a `goto` in C, rather 
than
+  /// jump tables.
+  bool terminatorIsComputedGotoWithSuccessors() const {
+return back().isIndirectBranch() && !succ_empty() &&
llvm::all_of(successors(), [](const MachineBasicBlock *Succ) {
  return Succ->isIRBlockAddressTaken();
});

diff  --git a/llvm/lib/CodeGen/TailDuplicator.cpp 
b/llvm/lib/CodeGen/TailDuplicator.cpp
index a88c57fdc165a..8cbdadd979810 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -604,12 +604,23 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   bool HasComputedGoto = false;
   if (!TailBB.empty()) {
 HasIndirectbr = TailBB.back().isIndirectBranch();
-HasComputedGoto = TailBB.terminatorIsComputedGoto();
+HasComputedGoto = TailBB.terminatorIsComputedGotoWithSuccessors();
   }
 
   if (HasIndirectbr && PreRegAlloc)
 MaxDuplicateCount = TailDupIndirectBranchSize;
 
+  // Allow higher limits when the block has computed-gotos and running after
+  // register allocation. NB. This basically unfactors computed gotos that were
+  // factored early on in the compilation process to speed up edge based data
+  // flow. If we do not unfactor them again, it can seriously pessimize code
+  // with many computed jumps in the source code, such as interpreters.
+  // Therefore we do not restrict the computed gotos.
+  bool DupComputedGotoLate =
+  HasComputedGoto && MF->getTarget().getTargetTriple().isOSDarwin();
+  if (DupComputedGotoLate && !PreRegAlloc)
+MaxDuplicateCount = std::max(MaxDuplicateCount, 10u);
+
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
   unsigned InstrCount = 0;
@@ -663,12 +674,10 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   // Duplicating a BB which has both multiple predecessors and successors will
   // may cause huge amount of PHI nodes. If we want to remove this limitation,
   // we have to address https://github.com/llvm/llvm-project/issues/78578.
-  // NB. This basically unfactors computed gotos that were factored early on in
-  // the compilation process to speed up edge based data flow. If we do not
-  // unfactor them again, it can seriously pessimize code with many computed
-  // jumps in the source code, su

[llvm-branch-commits] [llvm] release/21.x: [TailDup] Delay aggressive computed-goto taildup to after RegAlloc. (#150911) (PR #151680)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/151680
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [WIP] [clang] Align cleanup structs to prevent SIGBUS on sparc32 (#152866) (PR #154002)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

ping @efriedma-quic for review.

https://github.com/llvm/llvm-project/pull/154002
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add missing operand when disassembling Y4_crswap10 (#153849) (PR #153926)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

ping @quic-akaryaki for review.

https://github.com/llvm/llvm-project/pull/153926
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang] Allow trivial pp-directives before C++ module directive (#153641) (PR #154077)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/154077

>From fe59f72b9ac32b1c6b993ecac26c29bdfcf27407 Mon Sep 17 00:00:00 2001
From: yronglin 
Date: Mon, 18 Aug 2025 14:17:35 +0800
Subject: [PATCH] [clang] Allow trivial pp-directives before C++ module
 directive (#153641)

Consider the following code:

```cpp
# 1 __FILE__ 1 3
export module a;
```

According to the wording in
[P1857R3](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p1857r3.html):
```
A module directive may only appear as the first preprocessing tokens in a file 
(excluding the global module fragment.)
```

and the wording in
[[cpp.pre]](https://eel.is/c++draft/cpp.pre#nt:module-file)
```
module-file:
pp-global-module-fragment[opt] pp-module group[opt] 
pp-private-module-fragment[opt]
```

`#` is the first pp-token in the translation unit, and it was rejected
by clang, but they really should be exempted from this rule. The goal is
to not allow any preprocessor conditionals or most state changes, but
these don't fit that.

State change would mean most semantically observable preprocessor state,
particularly anything that is order dependent. Global flags like being a
system header/module shouldn't matter.

We should exempt a brunch of directives, even though it violates the
current standard wording.

In this patch, we introduce a `TrivialDirectiveTracer` to trace the
**State change** that described above and propose to exempt the
following kind of directive: `#line`, GNU line marker, `#ident`,
`#pragma comment`, `#pragma mark`, `#pragma detect_mismatch`, `#pragma
clang __debug`, `#pragma message`, `#pragma GCC warning`, `#pragma GCC
error`, `#pragma gcc diagnostic`, `#pragma OPENCL EXTENSION`, `#pragma
warning`, `#pragma execution_character_set`, `#pragma clang
assume_nonnull` and builtin macro expansion.

Fixes https://github.com/llvm/llvm-project/issues/145274

-

Signed-off-by: yronglin 
(cherry picked from commit e6e874ce8f055f5b8c5d7f8c7fb0afe764d1d350)
---
 clang/include/clang/Lex/Lexer.h   |   3 -
 .../clang/Lex/NoTrivialPPDirectiveTracer.h| 310 ++
 clang/include/clang/Lex/Preprocessor.h|  12 +
 clang/include/clang/Lex/Token.h   |  17 +-
 clang/include/clang/Sema/Sema.h   |   2 +-
 clang/lib/Lex/Lexer.cpp   |   9 -
 clang/lib/Lex/Preprocessor.cpp|  40 ++-
 clang/lib/Parse/Parser.cpp|   8 +-
 clang/lib/Sema/SemaModule.cpp |   6 +-
 clang/test/CXX/module/cpp.pre/module_decl.cpp | 141 +++-
 clang/unittests/Lex/CMakeLists.txt|   1 +
 clang/unittests/Lex/LexerTest.cpp |   4 +-
 clang/unittests/Lex/ModuleDeclStateTest.cpp   | 128 
 .../Lex/NoTrivialPPDirectiveTracerTest.cpp| 183 +++
 14 files changed, 763 insertions(+), 101 deletions(-)
 create mode 100644 clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h
 create mode 100644 clang/unittests/Lex/NoTrivialPPDirectiveTracerTest.cpp

diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 06971ff87ab96..423f2ffe2f852 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -143,9 +143,6 @@ class Lexer : public PreprocessorLexer {
   /// True if this is the first time we're lexing the input file.
   bool IsFirstTimeLexingFile;
 
-  /// True if current lexing token is the first pp-token.
-  bool IsFirstPPToken;
-
   // NewLinePtr - A pointer to new line character '\n' being lexed. For '\r\n',
   // it also points to '\n.'
   const char *NewLinePtr;
diff --git a/clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h 
b/clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h
new file mode 100644
index 0..9ab3c6a528a1a
--- /dev/null
+++ b/clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h
@@ -0,0 +1,310 @@
+//===--- NoTrivialPPDirectiveTracer.h ---*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+//  This file defines the NoTrivialPPDirectiveTracer interface.
+//
+//===--===//
+
+#ifndef LLVM_CLANG_LEX_NO_TRIVIAL_PPDIRECTIVE_TRACER_H
+#define LLVM_CLANG_LEX_NO_TRIVIAL_PPDIRECTIVE_TRACER_H
+
+#include "clang/Lex/PPCallbacks.h"
+
+namespace clang {
+class Preprocessor;
+
+/// Consider the following code:
+///
+/// # 1 __FILE__ 1 3
+/// export module a;
+///
+/// According to the wording in
+/// 
[P1857R3](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p1857r3.html):
+///
+///   A module directive may only appear as the first preprocessing tokens in a
+///   file (excluding the global module fragment.)
+///
+/// and the wording in
+/// [[cpp.pre

[llvm-branch-commits] [clang] fe59f72 - [clang] Allow trivial pp-directives before C++ module directive (#153641)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

Author: yronglin
Date: 2025-08-21T08:04:17+02:00
New Revision: fe59f72b9ac32b1c6b993ecac26c29bdfcf27407

URL: 
https://github.com/llvm/llvm-project/commit/fe59f72b9ac32b1c6b993ecac26c29bdfcf27407
DIFF: 
https://github.com/llvm/llvm-project/commit/fe59f72b9ac32b1c6b993ecac26c29bdfcf27407.diff

LOG: [clang] Allow trivial pp-directives before C++ module directive (#153641)

Consider the following code:

```cpp
# 1 __FILE__ 1 3
export module a;
```

According to the wording in
[P1857R3](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p1857r3.html):
```
A module directive may only appear as the first preprocessing tokens in a file 
(excluding the global module fragment.)
```

and the wording in
[[cpp.pre]](https://eel.is/c++draft/cpp.pre#nt:module-file)
```
module-file:
pp-global-module-fragment[opt] pp-module group[opt] 
pp-private-module-fragment[opt]
```

`#` is the first pp-token in the translation unit, and it was rejected
by clang, but they really should be exempted from this rule. The goal is
to not allow any preprocessor conditionals or most state changes, but
these don't fit that.

State change would mean most semantically observable preprocessor state,
particularly anything that is order dependent. Global flags like being a
system header/module shouldn't matter.

We should exempt a brunch of directives, even though it violates the
current standard wording.

In this patch, we introduce a `TrivialDirectiveTracer` to trace the
**State change** that described above and propose to exempt the
following kind of directive: `#line`, GNU line marker, `#ident`,
`#pragma comment`, `#pragma mark`, `#pragma detect_mismatch`, `#pragma
clang __debug`, `#pragma message`, `#pragma GCC warning`, `#pragma GCC
error`, `#pragma gcc diagnostic`, `#pragma OPENCL EXTENSION`, `#pragma
warning`, `#pragma execution_character_set`, `#pragma clang
assume_nonnull` and builtin macro expansion.

Fixes https://github.com/llvm/llvm-project/issues/145274

-

Signed-off-by: yronglin 
(cherry picked from commit e6e874ce8f055f5b8c5d7f8c7fb0afe764d1d350)

Added: 
clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h
clang/unittests/Lex/NoTrivialPPDirectiveTracerTest.cpp

Modified: 
clang/include/clang/Lex/Lexer.h
clang/include/clang/Lex/Preprocessor.h
clang/include/clang/Lex/Token.h
clang/include/clang/Sema/Sema.h
clang/lib/Lex/Lexer.cpp
clang/lib/Lex/Preprocessor.cpp
clang/lib/Parse/Parser.cpp
clang/lib/Sema/SemaModule.cpp
clang/test/CXX/module/cpp.pre/module_decl.cpp
clang/unittests/Lex/CMakeLists.txt
clang/unittests/Lex/LexerTest.cpp
clang/unittests/Lex/ModuleDeclStateTest.cpp

Removed: 




diff  --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h
index 06971ff87ab96..423f2ffe2f852 100644
--- a/clang/include/clang/Lex/Lexer.h
+++ b/clang/include/clang/Lex/Lexer.h
@@ -143,9 +143,6 @@ class Lexer : public PreprocessorLexer {
   /// True if this is the first time we're lexing the input file.
   bool IsFirstTimeLexingFile;
 
-  /// True if current lexing token is the first pp-token.
-  bool IsFirstPPToken;
-
   // NewLinePtr - A pointer to new line character '\n' being lexed. For '\r\n',
   // it also points to '\n.'
   const char *NewLinePtr;

diff  --git a/clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h 
b/clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h
new file mode 100644
index 0..9ab3c6a528a1a
--- /dev/null
+++ b/clang/include/clang/Lex/NoTrivialPPDirectiveTracer.h
@@ -0,0 +1,310 @@
+//===--- NoTrivialPPDirectiveTracer.h ---*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+//  This file defines the NoTrivialPPDirectiveTracer interface.
+//
+//===--===//
+
+#ifndef LLVM_CLANG_LEX_NO_TRIVIAL_PPDIRECTIVE_TRACER_H
+#define LLVM_CLANG_LEX_NO_TRIVIAL_PPDIRECTIVE_TRACER_H
+
+#include "clang/Lex/PPCallbacks.h"
+
+namespace clang {
+class Preprocessor;
+
+/// Consider the following code:
+///
+/// # 1 __FILE__ 1 3
+/// export module a;
+///
+/// According to the wording in
+/// 
[P1857R3](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p1857r3.html):
+///
+///   A module directive may only appear as the first preprocessing tokens in a
+///   file (excluding the global module fragment.)
+///
+/// and the wording in
+/// [[cpp.pre]](https://eel.is/c++draft/cpp.pre#nt:module-file):
+///   module-file:
+/// pp-global-module-fragment[opt] pp-module group[opt]
+/// pp-private-module-fragment[opt]
+///
+/// `#` is the first pp-token in the translation unit, and it was rejected by
+

[llvm-branch-commits] [clang] 16bea73 - [clang] return type not correctly deduced for discarded lambdas (#153921)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

Author: Oliver Hunt
Date: 2025-08-21T08:06:21+02:00
New Revision: 16bea73be5aeab1cac87a7f73d84d63a8ec438a7

URL: 
https://github.com/llvm/llvm-project/commit/16bea73be5aeab1cac87a7f73d84d63a8ec438a7
DIFF: 
https://github.com/llvm/llvm-project/commit/16bea73be5aeab1cac87a7f73d84d63a8ec438a7.diff

LOG: [clang] return type not correctly deduced for discarded lambdas (#153921)

The early return for lamda expressions with deduced return types in
Sema::ActOnCapScopeReturnStmt meant that we were not actually perform
the required return type deduction for such lambdas when in a discarded
context.

This PR removes that early return allowing the existing return type
deduction steps to be performed.

Fixes #153884

Fix developed by, and

Co-authored-by: Corentin Jabot 
(cherry picked from commit bcab8ac126c0b4c439caa3104d66d33d0f70f86f)

Added: 


Modified: 
clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
clang/test/CXX/stmt.stmt/stmt.select/stmt.if/p2.cpp

Removed: 




diff  --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp 
b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index e2c3cdcd536bc..d2b87f2702a9c 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -5685,7 +5685,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation 
PointOfInstantiation,
   };
   Function->setDeclarationNameLoc(NameLocPointsToPattern());
 
-  EnterExpressionEvaluationContext EvalContext(
+  EnterExpressionEvaluationContextForFunction EvalContext(
   *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
 
   Qualifiers ThisTypeQuals;

diff  --git a/clang/test/CXX/stmt.stmt/stmt.select/stmt.if/p2.cpp 
b/clang/test/CXX/stmt.stmt/stmt.select/stmt.if/p2.cpp
index abb42447d3e0b..05830de9891fe 100644
--- a/clang/test/CXX/stmt.stmt/stmt.select/stmt.if/p2.cpp
+++ b/clang/test/CXX/stmt.stmt/stmt.select/stmt.if/p2.cpp
@@ -239,5 +239,21 @@ void f2() {
 
 }
 
+namespace GH153884 {
+  bool f1() {
+auto f = [](auto) { return true; };
+if constexpr (0)
+  return f(1);
+return false;
+  }
+  bool f2() {
+auto f = [](auto x) { if (x) return 1.5; else return "wat"; };
+// expected-error@-1 {{'auto' in return type deduced as 'const char *' 
here but deduced as 'double' in earlier return statement}}
+if constexpr (0)
+  return f(1);
+// expected-note@-1 {{in instantiation of function template specialization 
'GH153884::f2()}}
+return false;
+  }
+}
 
 #endif



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang] return type not correctly deduced for discarded lambdas (#153921) (PR #154080)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/154080

>From 16bea73be5aeab1cac87a7f73d84d63a8ec438a7 Mon Sep 17 00:00:00 2001
From: Oliver Hunt 
Date: Mon, 18 Aug 2025 02:07:27 -0700
Subject: [PATCH] [clang] return type not correctly deduced for discarded
 lambdas (#153921)

The early return for lamda expressions with deduced return types in
Sema::ActOnCapScopeReturnStmt meant that we were not actually perform
the required return type deduction for such lambdas when in a discarded
context.

This PR removes that early return allowing the existing return type
deduction steps to be performed.

Fixes #153884

Fix developed by, and

Co-authored-by: Corentin Jabot 
(cherry picked from commit bcab8ac126c0b4c439caa3104d66d33d0f70f86f)
---
 clang/lib/Sema/SemaTemplateInstantiateDecl.cpp   |  2 +-
 .../CXX/stmt.stmt/stmt.select/stmt.if/p2.cpp | 16 
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp 
b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index e2c3cdcd536bc..d2b87f2702a9c 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -5685,7 +5685,7 @@ void Sema::InstantiateFunctionDefinition(SourceLocation 
PointOfInstantiation,
   };
   Function->setDeclarationNameLoc(NameLocPointsToPattern());
 
-  EnterExpressionEvaluationContext EvalContext(
+  EnterExpressionEvaluationContextForFunction EvalContext(
   *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
 
   Qualifiers ThisTypeQuals;
diff --git a/clang/test/CXX/stmt.stmt/stmt.select/stmt.if/p2.cpp 
b/clang/test/CXX/stmt.stmt/stmt.select/stmt.if/p2.cpp
index abb42447d3e0b..05830de9891fe 100644
--- a/clang/test/CXX/stmt.stmt/stmt.select/stmt.if/p2.cpp
+++ b/clang/test/CXX/stmt.stmt/stmt.select/stmt.if/p2.cpp
@@ -239,5 +239,21 @@ void f2() {
 
 }
 
+namespace GH153884 {
+  bool f1() {
+auto f = [](auto) { return true; };
+if constexpr (0)
+  return f(1);
+return false;
+  }
+  bool f2() {
+auto f = [](auto x) { if (x) return 1.5; else return "wat"; };
+// expected-error@-1 {{'auto' in return type deduced as 'const char *' 
here but deduced as 'double' in earlier return statement}}
+if constexpr (0)
+  return f(1);
+// expected-note@-1 {{in instantiation of function template specialization 
'GH153884::f2()}}
+return false;
+  }
+}
 
 #endif

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang][PAC] ptrauth_qualifier and ptrauth_intrinsic should only be available on Darwin (#153912) (PR #154198)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/154198
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang][PAC] ptrauth_qualifier and ptrauth_intrinsic should only be available on Darwin (#153912) (PR #154198)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/154198

>From 45300aad3ca6fe67bc1c9ae40af16f30843cbbfc Mon Sep 17 00:00:00 2001
From: Oliver Hunt 
Date: Mon, 18 Aug 2025 13:29:26 -0700
Subject: [PATCH] [clang][PAC] ptrauth_qualifier and ptrauth_intrinsic should
 only be available on Darwin (#153912)

For backwards compatibility reasons the `ptrauth_qualifier` and
`ptrauth_intrinsic` features need to be testable with `__has_feature()`
on Apple platforms, but for other platforms this backwards compatibility
issue does not exist.

This PR resolves these issues by making the `ptrauth_qualifier` and
`ptrauth_intrinsic` tests conditional upon a darwin target. This also
allows us to revert the ptrauth_qualifier check from an extension to a
feature test again, as is required on these platforms.

At the same time we introduce a new predefined macro `__PTRAUTH__` that
answers the same question as `__has_feature(ptrauth_qualifier)` and
`__has_feature(ptrauth_intrinsic)` as those tests are synonymous and
only exist separately for compatibility reasons.

The requirement to test for the `__PTRAUTH__` macro also resolves the
hazard presented by mixing the `ptrauth_qualifier` flag (that impacts
ABI and security policies) with `-pedantics-errors`, which makes
`__has_extension` return false for all extensions.

-

Co-authored-by: Aaron Ballman 
(cherry picked from commit 624b724ca6df5d2d3ea16b9ed232851e5d061be4)
---
 clang/docs/ReleaseNotes.rst |  6 +
 clang/include/clang/Basic/Features.def  |  6 +++--
 clang/lib/Frontend/InitPreprocessor.cpp |  3 +++
 clang/lib/Headers/ptrauth.h |  4 +--
 clang/test/Preprocessor/ptrauth_extension.c | 30 ++---
 clang/test/Preprocessor/ptrauth_feature.c   |  2 +-
 clang/test/Sema/ptrauth-qualifier.c | 16 +--
 clang/test/SemaObjC/ptrauth-qualifier.m | 16 +--
 8 files changed, 70 insertions(+), 13 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f4f7dd8342d92..37c5f429274ce 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -359,6 +359,12 @@ Non-comprehensive list of changes in this release
   ARC-managed pointers and other pointer types. The prior behavior was overly
   strict and inconsistent with the ARC specification.
 
+- Use of ``__has_feature`` to detect the ``ptrauth_qualifier`` and 
``ptrauth_intrinsics``
+  features has been deprecated, and is restricted to the arm64e target only. 
The
+  correct method to check for these features is to test for the ``__PTRAUTH__``
+  macro.
+
+
 New Compiler Flags
 --
 
diff --git a/clang/include/clang/Basic/Features.def 
b/clang/include/clang/Basic/Features.def
index 05dc08945b2f0..5cc3569ab4a94 100644
--- a/clang/include/clang/Basic/Features.def
+++ b/clang/include/clang/Basic/Features.def
@@ -147,8 +147,10 @@ FEATURE(type_sanitizer, 
LangOpts.Sanitize.has(SanitizerKind::Type))
 FEATURE(thread_sanitizer, LangOpts.Sanitize.has(SanitizerKind::Thread))
 FEATURE(dataflow_sanitizer, LangOpts.Sanitize.has(SanitizerKind::DataFlow))
 FEATURE(scudo, LangOpts.Sanitize.hasOneOf(SanitizerKind::Scudo))
-FEATURE(ptrauth_intrinsics, LangOpts.PointerAuthIntrinsics)
-EXTENSION(ptrauth_qualifier, LangOpts.PointerAuthIntrinsics)
+FEATURE(ptrauth_intrinsics, LangOpts.PointerAuthIntrinsics &&
+PP.getTargetInfo().getTriple().isOSDarwin())
+FEATURE(ptrauth_qualifier, LangOpts.PointerAuthIntrinsics &&
+   PP.getTargetInfo().getTriple().isOSDarwin())
 FEATURE(ptrauth_calls, LangOpts.PointerAuthCalls)
 FEATURE(ptrauth_returns, LangOpts.PointerAuthReturns)
 FEATURE(ptrauth_vtable_pointer_address_discrimination, 
LangOpts.PointerAuthVTPtrAddressDiscrimination)
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp 
b/clang/lib/Frontend/InitPreprocessor.cpp
index 34fb825e9d420..cce8392950b03 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -1535,6 +1535,9 @@ static void InitializePredefinedMacros(const TargetInfo 
&TI,
 #undef TARGET_OS
   }
 
+  if (LangOpts.PointerAuthIntrinsics)
+Builder.defineMacro("__PTRAUTH__");
+
   // Get other target #defines.
   TI.getTargetDefines(LangOpts, Builder);
 }
diff --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h
index 7f7d387cbdfda..f902ca1e3bbd3 100644
--- a/clang/lib/Headers/ptrauth.h
+++ b/clang/lib/Headers/ptrauth.h
@@ -95,7 +95,7 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
__ptrauth qualifier; the compiler will perform this check
automatically. */
 
-#if __has_feature(ptrauth_intrinsics)
+#if __has_feature(ptrauth_intrinsics) || defined(__PTRAUTH__)
 
 /* Strip the signature from a value without authenticating it.
 
@@ -388,6 +388,6 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
 #define __ptrauth_objc_isa_uintptr
 #define __ptrauth_objc_super_pointer
 
-#endif /* __has_feature

[llvm-branch-commits] [clang] 45300aa - [clang][PAC] ptrauth_qualifier and ptrauth_intrinsic should only be available on Darwin (#153912)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

Author: Oliver Hunt
Date: 2025-08-21T08:07:17+02:00
New Revision: 45300aad3ca6fe67bc1c9ae40af16f30843cbbfc

URL: 
https://github.com/llvm/llvm-project/commit/45300aad3ca6fe67bc1c9ae40af16f30843cbbfc
DIFF: 
https://github.com/llvm/llvm-project/commit/45300aad3ca6fe67bc1c9ae40af16f30843cbbfc.diff

LOG: [clang][PAC] ptrauth_qualifier and ptrauth_intrinsic should only be 
available on Darwin (#153912)

For backwards compatibility reasons the `ptrauth_qualifier` and
`ptrauth_intrinsic` features need to be testable with `__has_feature()`
on Apple platforms, but for other platforms this backwards compatibility
issue does not exist.

This PR resolves these issues by making the `ptrauth_qualifier` and
`ptrauth_intrinsic` tests conditional upon a darwin target. This also
allows us to revert the ptrauth_qualifier check from an extension to a
feature test again, as is required on these platforms.

At the same time we introduce a new predefined macro `__PTRAUTH__` that
answers the same question as `__has_feature(ptrauth_qualifier)` and
`__has_feature(ptrauth_intrinsic)` as those tests are synonymous and
only exist separately for compatibility reasons.

The requirement to test for the `__PTRAUTH__` macro also resolves the
hazard presented by mixing the `ptrauth_qualifier` flag (that impacts
ABI and security policies) with `-pedantics-errors`, which makes
`__has_extension` return false for all extensions.

-

Co-authored-by: Aaron Ballman 
(cherry picked from commit 624b724ca6df5d2d3ea16b9ed232851e5d061be4)

Added: 


Modified: 
clang/docs/ReleaseNotes.rst
clang/include/clang/Basic/Features.def
clang/lib/Frontend/InitPreprocessor.cpp
clang/lib/Headers/ptrauth.h
clang/test/Preprocessor/ptrauth_extension.c
clang/test/Preprocessor/ptrauth_feature.c
clang/test/Sema/ptrauth-qualifier.c
clang/test/SemaObjC/ptrauth-qualifier.m

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f4f7dd8342d92..37c5f429274ce 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -359,6 +359,12 @@ Non-comprehensive list of changes in this release
   ARC-managed pointers and other pointer types. The prior behavior was overly
   strict and inconsistent with the ARC specification.
 
+- Use of ``__has_feature`` to detect the ``ptrauth_qualifier`` and 
``ptrauth_intrinsics``
+  features has been deprecated, and is restricted to the arm64e target only. 
The
+  correct method to check for these features is to test for the ``__PTRAUTH__``
+  macro.
+
+
 New Compiler Flags
 --
 

diff  --git a/clang/include/clang/Basic/Features.def 
b/clang/include/clang/Basic/Features.def
index 05dc08945b2f0..5cc3569ab4a94 100644
--- a/clang/include/clang/Basic/Features.def
+++ b/clang/include/clang/Basic/Features.def
@@ -147,8 +147,10 @@ FEATURE(type_sanitizer, 
LangOpts.Sanitize.has(SanitizerKind::Type))
 FEATURE(thread_sanitizer, LangOpts.Sanitize.has(SanitizerKind::Thread))
 FEATURE(dataflow_sanitizer, LangOpts.Sanitize.has(SanitizerKind::DataFlow))
 FEATURE(scudo, LangOpts.Sanitize.hasOneOf(SanitizerKind::Scudo))
-FEATURE(ptrauth_intrinsics, LangOpts.PointerAuthIntrinsics)
-EXTENSION(ptrauth_qualifier, LangOpts.PointerAuthIntrinsics)
+FEATURE(ptrauth_intrinsics, LangOpts.PointerAuthIntrinsics &&
+PP.getTargetInfo().getTriple().isOSDarwin())
+FEATURE(ptrauth_qualifier, LangOpts.PointerAuthIntrinsics &&
+   PP.getTargetInfo().getTriple().isOSDarwin())
 FEATURE(ptrauth_calls, LangOpts.PointerAuthCalls)
 FEATURE(ptrauth_returns, LangOpts.PointerAuthReturns)
 FEATURE(ptrauth_vtable_pointer_address_discrimination, 
LangOpts.PointerAuthVTPtrAddressDiscrimination)

diff  --git a/clang/lib/Frontend/InitPreprocessor.cpp 
b/clang/lib/Frontend/InitPreprocessor.cpp
index 34fb825e9d420..cce8392950b03 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -1535,6 +1535,9 @@ static void InitializePredefinedMacros(const TargetInfo 
&TI,
 #undef TARGET_OS
   }
 
+  if (LangOpts.PointerAuthIntrinsics)
+Builder.defineMacro("__PTRAUTH__");
+
   // Get other target #defines.
   TI.getTargetDefines(LangOpts, Builder);
 }

diff  --git a/clang/lib/Headers/ptrauth.h b/clang/lib/Headers/ptrauth.h
index 7f7d387cbdfda..f902ca1e3bbd3 100644
--- a/clang/lib/Headers/ptrauth.h
+++ b/clang/lib/Headers/ptrauth.h
@@ -95,7 +95,7 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
__ptrauth qualifier; the compiler will perform this check
automatically. */
 
-#if __has_feature(ptrauth_intrinsics)
+#if __has_feature(ptrauth_intrinsics) || defined(__PTRAUTH__)
 
 /* Strip the signature from a value without authenticating it.
 
@@ -388,6 +388,6 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
 #define __ptrauth_objc_isa_uintptr
 #define __ptrauth_objc_super_pointer
 
-#endif /* __h

[llvm-branch-commits] [llvm] release/21.x: [POWERPC] Fixes an error in the handling of the MTVSRBMI instruction for big-endian (#151565) (PR #154138)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/154138

>From 2cf40a6ca83a24fbb773992f68c51808a3ea2610 Mon Sep 17 00:00:00 2001
From: zhijian lin 
Date: Wed, 6 Aug 2025 09:36:37 -0400
Subject: [PATCH] [POWERPC] Fixes an error in the handling of the MTVSRBMI
 instruction for big-endian  (#151565)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The patch fixed a bug introduced patch [[PowePC] using MTVSRBMI
instruction instead of constant pool in
power10+](https://github.com/llvm/llvm-project/pull/144084#top).

The issue arose because the layout of vector register elements differs
between little-endian and big-endian modes — specifically, the elements
appear in reverse order. This led to incorrect behavior when loading
constants using MTVSRBMI in big-endian configurations.

(cherry picked from commit 23b320311364f1bc1249500c7542d077d70098bf)
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 15 ++--
 llvm/test/CodeGen/PowerPC/mtvsrbmi.ll   | 87 ++---
 2 files changed, 86 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f179873b4dbd2..67f59ed507f38 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9594,12 +9594,14 @@ static bool isValidSplatLoad(const PPCSubtarget 
&Subtarget, const SDValue &Op,
   return false;
 }
 
-bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
+bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN,
+ bool IsLittleEndian) {
   assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
 
   BitMask.clearAllBits();
   EVT VT = BVN.getValueType(0);
-  APInt ConstValue(VT.getSizeInBits(), 0);
+  unsigned VTSize = VT.getSizeInBits();
+  APInt ConstValue(VTSize, 0);
 
   unsigned EltWidth = VT.getScalarSizeInBits();
 
@@ -9609,8 +9611,10 @@ bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode 
&BVN) {
 
 if (!CN)
   return false;
-
-ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+// The elements in a vector register are ordered in reverse byte order
+// between little-endian and big-endian modes.
+ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
+  IsLittleEndian ? BitPos : VTSize - EltWidth - 
BitPos);
 BitPos += EltWidth;
   }
 
@@ -9641,7 +9645,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
 // we do not convert it to MTVSRBMI.
 // The xxleqv instruction sets a vector with all ones.
 // The xxlxor instruction sets a vector with all zeros.
-if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0x) {
+if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
+BitMask != 0 && BitMask != 0x) {
   SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
   MachineSDNode *MSDNode =
   DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
diff --git a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll 
b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
index 232014db9a012..a9503f77c3090 100644
--- a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
+++ b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
@@ -2,22 +2,87 @@
 ; Verify whether the generated assembly for the following function includes 
the mtvsrbmi instruction.
 ; vector unsigned char v00FF()
 ; {
-; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
-; return x;
+;   vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
+;   return x;
+; }
+; vector unsigned short short00FF()
+; {
+;   vector unsigned short x = { 0xFF, 0,0,0, 0,0,0,0};
+;   return x;
+; }
+; vector unsigned int int00FF()
+; {
+;   vector unsigned int x = { 0xFF, 0,0,0};
+;   return x;
+; }
+; vector unsigned long long  longlong00FF()
+; {
+;   vector unsigned long long x = { 0xFF, 0};
+;   return x;
 ; }
 
 ; RUN: llc < %s -ppc-asm-full-reg-names  -mtriple=powerpc-ibm-aix -mcpu=pwr10  
-verify-machineinstrs \
-; RUN:   | FileCheck %s --check-prefix=CHECK
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+
+; RUN: llc < %s -ppc-asm-full-reg-names  
-mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr10  -verify-machineinstrs \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+
+; CHECK-NOT:   .byte   255
+; CHECK-NOT:   .byte   0
 
 define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() {
-; CHECK-NOT:  L..CPI0_0:
-; CHECK-NOT:   .byte   255 # 0xff
-; CHECK-NOT:   .byte   0   # 0x0
-
-; CHECK-LABEL: _Z5v00FFv:
-; CHECK:   # %bb.0: # %entry
-; CHECK-NEXT:mtvsrbmi v2, 1
-; CHECK-NEXT:blr
+; CHECK-BE-LABEL: _Z5v00FFv:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:mtvsrbmi v2, 32768
+; CHECK-BE-NEXT:blr
+;
+; CHECK-LE-LABEL: _Z5v00FFv:
+; CHECK-LE:   # %bb.0: # %entry
+; CHECK-LE-NEXT:mt

[llvm-branch-commits] [llvm] 2cf40a6 - [POWERPC] Fixes an error in the handling of the MTVSRBMI instruction for big-endian (#151565)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

Author: zhijian lin
Date: 2025-08-21T08:06:54+02:00
New Revision: 2cf40a6ca83a24fbb773992f68c51808a3ea2610

URL: 
https://github.com/llvm/llvm-project/commit/2cf40a6ca83a24fbb773992f68c51808a3ea2610
DIFF: 
https://github.com/llvm/llvm-project/commit/2cf40a6ca83a24fbb773992f68c51808a3ea2610.diff

LOG: [POWERPC] Fixes an error in the handling of the MTVSRBMI instruction for 
big-endian  (#151565)

The patch fixed a bug introduced patch [[PowePC] using MTVSRBMI
instruction instead of constant pool in
power10+](https://github.com/llvm/llvm-project/pull/144084#top).

The issue arose because the layout of vector register elements differs
between little-endian and big-endian modes — specifically, the elements
appear in reverse order. This led to incorrect behavior when loading
constants using MTVSRBMI in big-endian configurations.

(cherry picked from commit 23b320311364f1bc1249500c7542d077d70098bf)

Added: 


Modified: 
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/mtvsrbmi.ll

Removed: 




diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f179873b4dbd2..67f59ed507f38 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9594,12 +9594,14 @@ static bool isValidSplatLoad(const PPCSubtarget 
&Subtarget, const SDValue &Op,
   return false;
 }
 
-bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN) {
+bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN,
+ bool IsLittleEndian) {
   assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
 
   BitMask.clearAllBits();
   EVT VT = BVN.getValueType(0);
-  APInt ConstValue(VT.getSizeInBits(), 0);
+  unsigned VTSize = VT.getSizeInBits();
+  APInt ConstValue(VTSize, 0);
 
   unsigned EltWidth = VT.getScalarSizeInBits();
 
@@ -9609,8 +9611,10 @@ bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode 
&BVN) {
 
 if (!CN)
   return false;
-
-ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos);
+// The elements in a vector register are ordered in reverse byte order
+// between little-endian and big-endian modes.
+ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
+  IsLittleEndian ? BitPos : VTSize - EltWidth - 
BitPos);
 BitPos += EltWidth;
   }
 
@@ -9641,7 +9645,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
 // we do not convert it to MTVSRBMI.
 // The xxleqv instruction sets a vector with all ones.
 // The xxlxor instruction sets a vector with all zeros.
-if (isValidMtVsrBmi(BitMask, *BVN) && BitMask != 0 && BitMask != 0x) {
+if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
+BitMask != 0 && BitMask != 0x) {
   SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
   MachineSDNode *MSDNode =
   DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);

diff  --git a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll 
b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
index 232014db9a012..a9503f77c3090 100644
--- a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
+++ b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll
@@ -2,22 +2,87 @@
 ; Verify whether the generated assembly for the following function includes 
the mtvsrbmi instruction.
 ; vector unsigned char v00FF()
 ; {
-; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
-; return x;
+;   vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 };
+;   return x;
+; }
+; vector unsigned short short00FF()
+; {
+;   vector unsigned short x = { 0xFF, 0,0,0, 0,0,0,0};
+;   return x;
+; }
+; vector unsigned int int00FF()
+; {
+;   vector unsigned int x = { 0xFF, 0,0,0};
+;   return x;
+; }
+; vector unsigned long long  longlong00FF()
+; {
+;   vector unsigned long long x = { 0xFF, 0};
+;   return x;
 ; }
 
 ; RUN: llc < %s -ppc-asm-full-reg-names  -mtriple=powerpc-ibm-aix -mcpu=pwr10  
-verify-machineinstrs \
-; RUN:   | FileCheck %s --check-prefix=CHECK
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+
+; RUN: llc < %s -ppc-asm-full-reg-names  
-mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr10  -verify-machineinstrs \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+
+; CHECK-NOT:   .byte   255
+; CHECK-NOT:   .byte   0
 
 define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() {
-; CHECK-NOT:  L..CPI0_0:
-; CHECK-NOT:   .byte   255 # 0xff
-; CHECK-NOT:   .byte   0   # 0x0
-
-; CHECK-LABEL: _Z5v00FFv:
-; CHECK:   # %bb.0: # %entry
-; CHECK-NEXT:mtvsrbmi v2, 1
-; CHECK-NEXT:blr
+; CHECK-BE-LABEL: _Z5v00FFv:
+; CHECK-BE:   # %bb.0: # %entry
+; CHECK-BE-NEXT:mtvsrbmi v2, 32768
+; CHECK-BE-NEXT:blr
+;
+; CHECK-LE-LABEL: _Z5v00FFv:
+; CHECK-LE:   # %bb.0: # %entry
+; 

[llvm-branch-commits] [clang] release/21.x: [clang] Allow trivial pp-directives before C++ module directive (#153641) (PR #154077)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/154077
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang] Allow trivial pp-directives before C++ module directive (#153641) (PR #154077)

2025-08-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

@yronglin (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/154077
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang] return type not correctly deduced for discarded lambdas (#153921) (PR #154080)

2025-08-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

@ojhunt (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/154080
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [POWERPC] Fixes an error in the handling of the MTVSRBMI instruction for big-endian (#151565) (PR #154138)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/154138
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang][ObjC] Fix incorrect return type inference for discarded blocks (#154109) (PR #154210)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/154210

>From 7d6969bc6f86265ac938112b0d4d5f7dfea0510d Mon Sep 17 00:00:00 2001
From: Oliver Hunt 
Date: Mon, 18 Aug 2025 14:38:50 -0700
Subject: [PATCH] [clang][ObjC] Fix incorrect return type inference for
 discarded blocks (#154109)

When parsing a block expression we were not entering a new eval context
and as a result when parsing the block body we continue to treat any
return statements as discarded so infer a `void` result.

This fixes the problem by introducing an evaluation context around the
parsing of the body.

(cherry picked from commit ec4e6aaac4612af26322b2b10b8f518ecf053c74)
---
 clang/lib/Parse/ParseExpr.cpp |  3 ++-
 .../SemaObjCXX/discarded-block-type-inference.mm  | 15 +++
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/SemaObjCXX/discarded-block-type-inference.mm

diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index bc238a9517a37..3515343202de1 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -3342,7 +3342,8 @@ ExprResult Parser::ParseBlockLiteralExpression() {
 Actions.ActOnBlockError(CaretLoc, getCurScope());
 return ExprError();
   }
-
+  EnterExpressionEvaluationContextForFunction PotentiallyEvaluated(
+   Actions, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
   StmtResult Stmt(ParseCompoundStatementBody());
   BlockScope.Exit();
   if (!Stmt.isInvalid())
diff --git a/clang/test/SemaObjCXX/discarded-block-type-inference.mm 
b/clang/test/SemaObjCXX/discarded-block-type-inference.mm
new file mode 100644
index 0..8e2587724a7f6
--- /dev/null
+++ b/clang/test/SemaObjCXX/discarded-block-type-inference.mm
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fobjc-arc -fblocks %s
+
+void  block_receiver(int (^)() );
+
+int f1() {
+  if constexpr (0)
+(block_receiver)(^{ return 2; });
+  return 1;
+}
+
+int f2() {
+  if constexpr (0)
+return (^{ return 2; })();
+  return 1;
+}

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang][ObjC] Fix incorrect return type inference for discarded blocks (#154109) (PR #154210)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/154210
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang][PAC] ptrauth_qualifier and ptrauth_intrinsic should only be available on Darwin (#153912) (PR #154198)

2025-08-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

@ojhunt (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/154198
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [POWERPC] Fixes an error in the handling of the MTVSRBMI instruction for big-endian (#151565) (PR #154138)

2025-08-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

@amy-kwan (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/154138
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 7d6969b - [clang][ObjC] Fix incorrect return type inference for discarded blocks (#154109)

2025-08-20 Thread Tobias Hieta via llvm-branch-commits

Author: Oliver Hunt
Date: 2025-08-21T08:07:51+02:00
New Revision: 7d6969bc6f86265ac938112b0d4d5f7dfea0510d

URL: 
https://github.com/llvm/llvm-project/commit/7d6969bc6f86265ac938112b0d4d5f7dfea0510d
DIFF: 
https://github.com/llvm/llvm-project/commit/7d6969bc6f86265ac938112b0d4d5f7dfea0510d.diff

LOG: [clang][ObjC] Fix incorrect return type inference for discarded blocks 
(#154109)

When parsing a block expression we were not entering a new eval context
and as a result when parsing the block body we continue to treat any
return statements as discarded so infer a `void` result.

This fixes the problem by introducing an evaluation context around the
parsing of the body.

(cherry picked from commit ec4e6aaac4612af26322b2b10b8f518ecf053c74)

Added: 
clang/test/SemaObjCXX/discarded-block-type-inference.mm

Modified: 
clang/lib/Parse/ParseExpr.cpp

Removed: 




diff  --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index bc238a9517a37..3515343202de1 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -3342,7 +3342,8 @@ ExprResult Parser::ParseBlockLiteralExpression() {
 Actions.ActOnBlockError(CaretLoc, getCurScope());
 return ExprError();
   }
-
+  EnterExpressionEvaluationContextForFunction PotentiallyEvaluated(
+   Actions, Sema::ExpressionEvaluationContext::PotentiallyEvaluated);
   StmtResult Stmt(ParseCompoundStatementBody());
   BlockScope.Exit();
   if (!Stmt.isInvalid())

diff  --git a/clang/test/SemaObjCXX/discarded-block-type-inference.mm 
b/clang/test/SemaObjCXX/discarded-block-type-inference.mm
new file mode 100644
index 0..8e2587724a7f6
--- /dev/null
+++ b/clang/test/SemaObjCXX/discarded-block-type-inference.mm
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -std=c++23 -fsyntax-only -fobjc-arc -fblocks %s
+
+void  block_receiver(int (^)() );
+
+int f1() {
+  if constexpr (0)
+(block_receiver)(^{ return 2; });
+  return 1;
+}
+
+int f2() {
+  if constexpr (0)
+return (^{ return 2; })();
+  return 1;
+}



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [analyzer][docs] CSA release notes for clang-21 (PR #154600)

2025-08-20 Thread Balazs Benics via llvm-branch-commits

steakhal wrote:

I had limited time, so I used LLM to generate this. Exercise extra scrutiny 
during review.

There is an unrelated section about `Type-aware allocation and deallocation 
functions` that I'll move somewhere more appropriate later. The generated html 
would look like this then:
https://github.com/user-attachments/assets/dca80bd0-00ac-4243-b1e6-7a53614ff8e7";
 />


https://github.com/llvm/llvm-project/pull/154600
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [analyzer][docs] CSA release notes for clang-21 (PR #154600)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-static-analyzer-1

Author: Balazs Benics (steakhal)


Changes

The commits were gathered using:
```sh
git log --reverse --oneline llvmorg-20-init..llvm/main \
  clang/{lib/StaticAnalyzer,include/clang/StaticAnalyzer} | grep -v NFC | \
  grep -v OpenACC | grep -v -i revert | grep -v -i "webkit"
```

FYI, I also ignored Webkit changes because I assue it's fairly specific for 
them, and they likely already know what they ship xD.

I used the `LLVM_ENABLE_SPHINX=ON` and `LLVM_ENABLE_DOXYGEN=ON` cmake options 
to enable the `docs-clang-html` build target, which generates the html into 
`build/tools/clang/docs/html/ReleaseNotes.html` of which I attach the 
screenshots to let you judge if it looks all good or not.

---
Full diff: https://github.com/llvm/llvm-project/pull/154600.diff


1 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+85-5) 


``diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f4f7dd8342d92..a8fd4b174cf7c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1198,8 +1198,6 @@ Code Completion
 
 Static Analyzer
 ---
-- Fixed a crash when C++20 parenthesized initializer lists are used. This issue
-  was causing a crash in clang-tidy. (#GH136041)
 
 New features
 
@@ -1223,20 +1221,99 @@ New features
 - Implemented `P2719R5 Type-aware allocation and deallocation functions 
`_
   as an extension in all C++ language modes.
 
+- Added support for the ``[[clang::assume(cond)]]`` attribute, treating it as
+  ``__builtin_assume(cond)`` for better static analysis. (#GH129234)
+
+- Introduced per-entry-point statistics to provide more detailed analysis 
metrics.
+  Documentation: :doc:`analyzer/developer-docs/Statistics` (#GH131175)
+
+- Added time-trace scopes for high-level analyzer steps to improve performance
+  debugging. Documentation: 
:doc:`analyzer/developer-docs/PerformanceInvestigation`
+  (#GH125508, #GH125884)
+
+- Enhanced the ``check::BlockEntrance`` checker callback to provide more 
granular
+  control over block-level analysis.
+  `Documentation (check::BlockEntrance)
+  `_
+  (#GH140924)
+
+- Added a new experimental checker ``alpha.core.FixedAddressDereference`` to 
detect
+  dereferences of fixed addresses, which can be useful for finding hard-coded 
memory
+  accesses. (#GH127191)
 
 Crash and bug fixes
 ^^^
 
+- Fixed a crash when C++20 parenthesized initializer lists are used.
+  This affected a crash of the well-known lambda overloaded pattern.
+  (#GH136041, #GH135665)
+
+- Dropped an unjustified assertion, that was triggered in 
``BugReporterVisitors.cpp``
+  for variable initialization detection. (#GH125044)
+
 - Fixed a crash in ``UnixAPIMisuseChecker`` and ``MallocChecker`` when 
analyzing
   code with non-standard ``getline`` or ``getdelim`` function signatures. 
(#GH144884)
 
+- Fixed crashes involving ``__builtin_bit_cast``. (#GH139188)
+
+- ``__datasizeof`` (C++) and ``_Countof`` (C) no longer cause a failed 
assertion
+  when given an operand of VLA type. (#GH151711)
+
+- Fixed a crash in ``CastSizeChecker``. (#GH134387)
+
+- Some ``cplusplus.PlacementNew`` false positives were fixed. (#GH150161)
+
 Improvements
 
 
+- Added option to assume at least one iteration in loops to reduce false 
positives.
+  (#GH125494)
+
 - The checker option ``optin.cplusplus.VirtualCall:PureOnly`` was removed,
-  because it had been deprecated since 2019 and it is completely useless (it
-  was kept only for compatibility with pre-2019 versions, setting it to true is
-  equivalent to completely disabling the checker).
+  because it had been deprecated since 2019. (#GH131823)
+
+- Enhanced the ``StackAddrEscapeChecker`` to detect more cases of stack address
+  escapes, including return values for child stack frames. (#GH126620, 
#GH126986)
+
+- Improved the ``BlockInCriticalSectionChecker`` to recognize ``O_NONBLOCK``
+  streams and suppress reports in those cases. (#GH127049)
+
+- Better support for lambda-converted function pointers in analysis. 
(#GH144906)
+
+- Improved modeling of ``getcwd`` function in ``StdCLibraryFunctions`` checker.
+  (#GH141076)
+
+- Enhanced the ``EnumCastOutOfRange`` checker to ignore 
``[[clang::flag_enum]]``
+  enums. (#GH141232)
+
+- Improved handling of structured bindings captured by lambdas. (#GH132579, 
#GH91835)
+
+- Fixed unnamed bitfield handling in ``UninitializedObjectChecker``. 
(#GH132427, #GH132001)
+
+- Enhanced iterator checker modeling for ``insert`` operations. (#GH132596)
+
+- Improved ``format`` attribute handling in ``GenericTaintChecker``. 
(#GH132765)
+
+- Added support for ``consteval`` in ``ConditionBRVisitor::VisitTerminator``.
+  (#GH146859, #GH139130)
+
+- Enhanced handling of C standard streams in internal memory space. (#GH147766)
+
+- Enhanced store management with re

[llvm-branch-commits] [clang] [analyzer][docs] CSA release notes for clang-21 (PR #154600)

2025-08-20 Thread Balazs Benics via llvm-branch-commits

https://github.com/steakhal created 
https://github.com/llvm/llvm-project/pull/154600

The commits were gathered using:
```sh
git log --reverse --oneline llvmorg-20-init..llvm/main \
  clang/{lib/StaticAnalyzer,include/clang/StaticAnalyzer} | grep -v NFC | \
  grep -v OpenACC | grep -v -i revert | grep -v -i "webkit"
```

FYI, I also ignored Webkit changes because I assue it's fairly specific for 
them, and they likely already know what they ship xD.

I used the `LLVM_ENABLE_SPHINX=ON` and `LLVM_ENABLE_DOXYGEN=ON` cmake options 
to enable the `docs-clang-html` build target, which generates the html into 
`build/tools/clang/docs/html/ReleaseNotes.html` of which I attach the 
screenshots to let you judge if it looks all good or not.

>From 282a84dbcc57738398da024f021bcc057099edb3 Mon Sep 17 00:00:00 2001
From: Balazs Benics 
Date: Wed, 20 Aug 2025 21:40:26 +0200
Subject: [PATCH] [analyzer][docs] CSA release notes for clang-21

The commits were gathered using:
```sh
git log --reverse --oneline llvmorg-20-init..llvm/main \
  clang/{lib/StaticAnalyzer,include/clang/StaticAnalyzer} | grep -v NFC | \
  grep -v OpenACC | grep -v -i revert | grep -v -i "webkit"
```

FYI, I also ignored Webkit changes because I assue it's fairly specific
for them, and they likely already know what they ship xD.

I used the `LLVM_ENABLE_SPHINX=ON` and `LLVM_ENABLE_DOXYGEN=ON` cmake
options to enable the `docs-clang-html` build target, which generates
the html into `build/tools/clang/docs/html/ReleaseNotes.html` of which I
attach the screenshots to let you judge if it looks all good or not.
---
 clang/docs/ReleaseNotes.rst | 90 ++---
 1 file changed, 85 insertions(+), 5 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f4f7dd8342d92..a8fd4b174cf7c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1198,8 +1198,6 @@ Code Completion
 
 Static Analyzer
 ---
-- Fixed a crash when C++20 parenthesized initializer lists are used. This issue
-  was causing a crash in clang-tidy. (#GH136041)
 
 New features
 
@@ -1223,20 +1221,99 @@ New features
 - Implemented `P2719R5 Type-aware allocation and deallocation functions 
`_
   as an extension in all C++ language modes.
 
+- Added support for the ``[[clang::assume(cond)]]`` attribute, treating it as
+  ``__builtin_assume(cond)`` for better static analysis. (#GH129234)
+
+- Introduced per-entry-point statistics to provide more detailed analysis 
metrics.
+  Documentation: :doc:`analyzer/developer-docs/Statistics` (#GH131175)
+
+- Added time-trace scopes for high-level analyzer steps to improve performance
+  debugging. Documentation: 
:doc:`analyzer/developer-docs/PerformanceInvestigation`
+  (#GH125508, #GH125884)
+
+- Enhanced the ``check::BlockEntrance`` checker callback to provide more 
granular
+  control over block-level analysis.
+  `Documentation (check::BlockEntrance)
+  `_
+  (#GH140924)
+
+- Added a new experimental checker ``alpha.core.FixedAddressDereference`` to 
detect
+  dereferences of fixed addresses, which can be useful for finding hard-coded 
memory
+  accesses. (#GH127191)
 
 Crash and bug fixes
 ^^^
 
+- Fixed a crash when C++20 parenthesized initializer lists are used.
+  This affected a crash of the well-known lambda overloaded pattern.
+  (#GH136041, #GH135665)
+
+- Dropped an unjustified assertion, that was triggered in 
``BugReporterVisitors.cpp``
+  for variable initialization detection. (#GH125044)
+
 - Fixed a crash in ``UnixAPIMisuseChecker`` and ``MallocChecker`` when 
analyzing
   code with non-standard ``getline`` or ``getdelim`` function signatures. 
(#GH144884)
 
+- Fixed crashes involving ``__builtin_bit_cast``. (#GH139188)
+
+- ``__datasizeof`` (C++) and ``_Countof`` (C) no longer cause a failed 
assertion
+  when given an operand of VLA type. (#GH151711)
+
+- Fixed a crash in ``CastSizeChecker``. (#GH134387)
+
+- Some ``cplusplus.PlacementNew`` false positives were fixed. (#GH150161)
+
 Improvements
 
 
+- Added option to assume at least one iteration in loops to reduce false 
positives.
+  (#GH125494)
+
 - The checker option ``optin.cplusplus.VirtualCall:PureOnly`` was removed,
-  because it had been deprecated since 2019 and it is completely useless (it
-  was kept only for compatibility with pre-2019 versions, setting it to true is
-  equivalent to completely disabling the checker).
+  because it had been deprecated since 2019. (#GH131823)
+
+- Enhanced the ``StackAddrEscapeChecker`` to detect more cases of stack address
+  escapes, including return values for child stack frames. (#GH126620, 
#GH126986)
+
+- Improved the ``BlockInCriticalSectionChecker`` to recognize ``O_NONBLOCK``
+  streams and suppress reports in those cases. (#GH127049)
+
+- Better support for lambda-converted function pointers in analysis. 

[llvm-branch-commits] [clang] [analyzer][docs] CSA release notes for clang-21 (PR #154600)

2025-08-20 Thread Balazs Benics via llvm-branch-commits

https://github.com/steakhal milestoned 
https://github.com/llvm/llvm-project/pull/154600
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [NFC][Clang][Docs] Update Pointer Authentication documentation (#152596) (PR #154240)

2025-08-20 Thread Oliver Hunt via llvm-branch-commits

ojhunt wrote:

> It probably makes sense to backport this to the release branch, but @ojhunt 
> would be much better to make that decision. @ojhunt : What do you think about 
> merging this back onto the LLVM 21 release branch?

@kbeyls I initiated this back port request :D :D :D

https://github.com/llvm/llvm-project/pull/154240
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Removing dxbc RootSignature and RootDescriptor from mcbxdc (PR #154585)

2025-08-20 Thread via llvm-branch-commits

https://github.com/joaosaffran created 
https://github.com/llvm/llvm-project/pull/154585

MC Root Signature Representations currently depend on Object structures. This 
PR removes that dependency and in order to facilitate removing to_underlying 
usage in follow up PRs.

>From f3ecd8ae8c4af29d23d8c82c6a9104b615c24473 Mon Sep 17 00:00:00 2001
From: Joao Saffran 
Date: Wed, 20 Aug 2025 10:15:37 -0700
Subject: [PATCH] removing dependency of Object

---
 .../llvm/MC/DXContainerRootSignature.h| 25 +--
 .../Frontend/HLSL/RootSignatureMetadata.cpp   |  6 ++---
 llvm/lib/MC/DXContainerRootSignature.cpp  |  4 +--
 llvm/lib/ObjectYAML/DXContainerEmitter.cpp|  4 +--
 .../DXILPostOptimizationValidation.cpp|  4 +--
 llvm/lib/Target/DirectX/DXILRootSignature.cpp |  4 +--
 6 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/llvm/include/llvm/MC/DXContainerRootSignature.h 
b/llvm/include/llvm/MC/DXContainerRootSignature.h
index 4db3f3458c808..85b45323cee08 100644
--- a/llvm/include/llvm/MC/DXContainerRootSignature.h
+++ b/llvm/include/llvm/MC/DXContainerRootSignature.h
@@ -19,6 +19,18 @@ namespace llvm {
 class raw_ostream;
 namespace mcdxbc {
 
+struct RootConstants {
+  uint32_t ShaderRegister;
+  uint32_t RegisterSpace;
+  uint32_t Num32BitValues;
+};
+
+struct RootDescriptor {
+  uint32_t ShaderRegister;
+  uint32_t RegisterSpace;
+  uint32_t Flags;
+};
+
 struct RootParameterInfo {
   dxbc::RootParameterType Type;
   dxbc::ShaderVisibility Visibility;
@@ -42,8 +54,8 @@ struct DescriptorTable {
 struct RootParametersContainer {
   SmallVector ParametersInfo;
 
-  SmallVector Constants;
-  SmallVector Descriptors;
+  SmallVector Constants;
+  SmallVector Descriptors;
   SmallVector Tables;
 
   void addInfo(dxbc::RootParameterType Type, dxbc::ShaderVisibility Visibility,
@@ -52,15 +64,14 @@ struct RootParametersContainer {
   }
 
   void addParameter(dxbc::RootParameterType Type,
-dxbc::ShaderVisibility Visibility,
-dxbc::RTS0::v1::RootConstants Constant) {
+dxbc::ShaderVisibility Visibility, RootConstants Constant) 
{
 addInfo(Type, Visibility, Constants.size());
 Constants.push_back(Constant);
   }
 
   void addParameter(dxbc::RootParameterType Type,
 dxbc::ShaderVisibility Visibility,
-dxbc::RTS0::v2::RootDescriptor Descriptor) {
+RootDescriptor Descriptor) {
 addInfo(Type, Visibility, Descriptors.size());
 Descriptors.push_back(Descriptor);
   }
@@ -76,11 +87,11 @@ struct RootParametersContainer {
 return Info;
   }
 
-  const dxbc::RTS0::v1::RootConstants &getConstant(size_t Index) const {
+  const RootConstants &getConstant(size_t Index) const {
 return Constants[Index];
   }
 
-  const dxbc::RTS0::v2::RootDescriptor &getRootDescriptor(size_t Index) const {
+  const RootDescriptor &getRootDescriptor(size_t Index) const {
 return Descriptors[Index];
   }
 
diff --git a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp 
b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
index 610f889e8d7c6..770a6d1638e3c 100644
--- a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
+++ b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
@@ -240,7 +240,7 @@ Error 
MetadataParser::parseRootConstants(mcdxbc::RootSignatureDesc &RSD,
   if (auto E = Visibility.takeError())
 return Error(std::move(E));
 
-  dxbc::RTS0::v1::RootConstants Constants;
+  mcdxbc::RootConstants Constants;
   if (std::optional Val = extractMdIntValue(RootConstantNode, 2))
 Constants.ShaderRegister = *Val;
   else
@@ -294,7 +294,7 @@ Error MetadataParser::parseRootDescriptors(
   if (auto E = Visibility.takeError())
 return Error(std::move(E));
 
-  dxbc::RTS0::v2::RootDescriptor Descriptor;
+  mcdxbc::RootDescriptor Descriptor;
   if (std::optional Val = extractMdIntValue(RootDescriptorNode, 2))
 Descriptor.ShaderRegister = *Val;
   else
@@ -544,7 +544,7 @@ Error MetadataParser::validateRootSignature(
 case dxbc::RootParameterType::CBV:
 case dxbc::RootParameterType::UAV:
 case dxbc::RootParameterType::SRV: {
-  const dxbc::RTS0::v2::RootDescriptor &Descriptor =
+  const mcdxbc::RootDescriptor &Descriptor =
   RSD.ParametersContainer.getRootDescriptor(Info.Location);
   if (!hlsl::rootsig::verifyRegisterValue(Descriptor.ShaderRegister))
 DeferredErrs =
diff --git a/llvm/lib/MC/DXContainerRootSignature.cpp 
b/llvm/lib/MC/DXContainerRootSignature.cpp
index 14c9c8866bb24..94119d8e89ec9 100644
--- a/llvm/lib/MC/DXContainerRootSignature.cpp
+++ b/llvm/lib/MC/DXContainerRootSignature.cpp
@@ -97,7 +97,7 @@ void RootSignatureDesc::write(raw_ostream &OS) const {
 const auto Info = ParametersContainer.getInfo(I);
 switch (Info.Type) {
 case dxbc::RootParameterType::Constants32Bit: {
-  const dxbc::RTS0::v1::RootConstants &Constants =
+  const mcdxbc::RootConstants &Constants =
   ParametersCo

[llvm-branch-commits] [llvm] [DirectX] Removing dxbc RootSignature and RootDescriptor from mcbxdc (PR #154585)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-hlsl

Author: None (joaosaffran)


Changes

MC Root Signature Representations currently depend on Object structures. This 
PR removes that dependency and in order to facilitate removing to_underlying 
usage in follow up PRs.

---
Full diff: https://github.com/llvm/llvm-project/pull/154585.diff


6 Files Affected:

- (modified) llvm/include/llvm/MC/DXContainerRootSignature.h (+18-7) 
- (modified) llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp (+3-3) 
- (modified) llvm/lib/MC/DXContainerRootSignature.cpp (+2-2) 
- (modified) llvm/lib/ObjectYAML/DXContainerEmitter.cpp (+2-2) 
- (modified) llvm/lib/Target/DirectX/DXILPostOptimizationValidation.cpp (+2-2) 
- (modified) llvm/lib/Target/DirectX/DXILRootSignature.cpp (+2-2) 


``diff
diff --git a/llvm/include/llvm/MC/DXContainerRootSignature.h 
b/llvm/include/llvm/MC/DXContainerRootSignature.h
index 4db3f3458c808..85b45323cee08 100644
--- a/llvm/include/llvm/MC/DXContainerRootSignature.h
+++ b/llvm/include/llvm/MC/DXContainerRootSignature.h
@@ -19,6 +19,18 @@ namespace llvm {
 class raw_ostream;
 namespace mcdxbc {
 
+struct RootConstants {
+  uint32_t ShaderRegister;
+  uint32_t RegisterSpace;
+  uint32_t Num32BitValues;
+};
+
+struct RootDescriptor {
+  uint32_t ShaderRegister;
+  uint32_t RegisterSpace;
+  uint32_t Flags;
+};
+
 struct RootParameterInfo {
   dxbc::RootParameterType Type;
   dxbc::ShaderVisibility Visibility;
@@ -42,8 +54,8 @@ struct DescriptorTable {
 struct RootParametersContainer {
   SmallVector ParametersInfo;
 
-  SmallVector Constants;
-  SmallVector Descriptors;
+  SmallVector Constants;
+  SmallVector Descriptors;
   SmallVector Tables;
 
   void addInfo(dxbc::RootParameterType Type, dxbc::ShaderVisibility Visibility,
@@ -52,15 +64,14 @@ struct RootParametersContainer {
   }
 
   void addParameter(dxbc::RootParameterType Type,
-dxbc::ShaderVisibility Visibility,
-dxbc::RTS0::v1::RootConstants Constant) {
+dxbc::ShaderVisibility Visibility, RootConstants Constant) 
{
 addInfo(Type, Visibility, Constants.size());
 Constants.push_back(Constant);
   }
 
   void addParameter(dxbc::RootParameterType Type,
 dxbc::ShaderVisibility Visibility,
-dxbc::RTS0::v2::RootDescriptor Descriptor) {
+RootDescriptor Descriptor) {
 addInfo(Type, Visibility, Descriptors.size());
 Descriptors.push_back(Descriptor);
   }
@@ -76,11 +87,11 @@ struct RootParametersContainer {
 return Info;
   }
 
-  const dxbc::RTS0::v1::RootConstants &getConstant(size_t Index) const {
+  const RootConstants &getConstant(size_t Index) const {
 return Constants[Index];
   }
 
-  const dxbc::RTS0::v2::RootDescriptor &getRootDescriptor(size_t Index) const {
+  const RootDescriptor &getRootDescriptor(size_t Index) const {
 return Descriptors[Index];
   }
 
diff --git a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp 
b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
index 610f889e8d7c6..770a6d1638e3c 100644
--- a/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
+++ b/llvm/lib/Frontend/HLSL/RootSignatureMetadata.cpp
@@ -240,7 +240,7 @@ Error 
MetadataParser::parseRootConstants(mcdxbc::RootSignatureDesc &RSD,
   if (auto E = Visibility.takeError())
 return Error(std::move(E));
 
-  dxbc::RTS0::v1::RootConstants Constants;
+  mcdxbc::RootConstants Constants;
   if (std::optional Val = extractMdIntValue(RootConstantNode, 2))
 Constants.ShaderRegister = *Val;
   else
@@ -294,7 +294,7 @@ Error MetadataParser::parseRootDescriptors(
   if (auto E = Visibility.takeError())
 return Error(std::move(E));
 
-  dxbc::RTS0::v2::RootDescriptor Descriptor;
+  mcdxbc::RootDescriptor Descriptor;
   if (std::optional Val = extractMdIntValue(RootDescriptorNode, 2))
 Descriptor.ShaderRegister = *Val;
   else
@@ -544,7 +544,7 @@ Error MetadataParser::validateRootSignature(
 case dxbc::RootParameterType::CBV:
 case dxbc::RootParameterType::UAV:
 case dxbc::RootParameterType::SRV: {
-  const dxbc::RTS0::v2::RootDescriptor &Descriptor =
+  const mcdxbc::RootDescriptor &Descriptor =
   RSD.ParametersContainer.getRootDescriptor(Info.Location);
   if (!hlsl::rootsig::verifyRegisterValue(Descriptor.ShaderRegister))
 DeferredErrs =
diff --git a/llvm/lib/MC/DXContainerRootSignature.cpp 
b/llvm/lib/MC/DXContainerRootSignature.cpp
index 14c9c8866bb24..94119d8e89ec9 100644
--- a/llvm/lib/MC/DXContainerRootSignature.cpp
+++ b/llvm/lib/MC/DXContainerRootSignature.cpp
@@ -97,7 +97,7 @@ void RootSignatureDesc::write(raw_ostream &OS) const {
 const auto Info = ParametersContainer.getInfo(I);
 switch (Info.Type) {
 case dxbc::RootParameterType::Constants32Bit: {
-  const dxbc::RTS0::v1::RootConstants &Constants =
+  const mcdxbc::RootConstants &Constants =
   ParametersContainer.getConstant(Info.Location);
   support::endian::write(BOS, Cons

[llvm-branch-commits] [mlir] MLIR bug fixes for LLVM 21.x release (PR #154587)

2025-08-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

⚠️ We detected that you are using a GitHub private e-mail address to contribute 
to the repo. Please turn off [Keep my email addresses 
private](https://github.com/settings/emails) setting in your account. See 
[LLVM Developer 
Policy](https://llvm.org/docs/DeveloperPolicy.html#email-addresses) and [LLVM 
Discourse](https://discourse.llvm.org/t/hidden-emails-on-github-should-we-do-something-about-it)
 for more information.

https://github.com/llvm/llvm-project/pull/154587
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] MLIR bug fixes for LLVM 21.x release (PR #154587)

2025-08-20 Thread Mehdi Amini via llvm-branch-commits

joker-eph wrote:

@tru : I haven't done release cherry-pick for a while, so I'm not sure about 
the current process to get this merged, is this on you? Do I need to ping you 
or are you going through these PRs on a regular basis?

https://github.com/llvm/llvm-project/pull/154587
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Move OpenACC Release Note- (PR #154605)

2025-08-20 Thread Balazs Benics via llvm-branch-commits

steakhal wrote:

I hope I can get the other two entries also to their right places, but thats 
not your job.

https://github.com/llvm/llvm-project/pull/154605
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Move rest of documentation problems that found their way to the SA sec. (PR #154608)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Erich Keane (erichkeane)


Changes

It was brought up in response to #154605 that these two were in the 
wrong place as well!  This patch tries to find better places for them,
  and moves them.

---
Full diff: https://github.com/llvm/llvm-project/pull/154608.diff


1 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+7-20) 


``diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f4f7dd8342d92..0745c6117cbea 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -123,6 +123,8 @@ C++ Language Changes
   a perfect match (all conversion sequences are identity conversions) template 
candidates are not instantiated.
   Diagnostics that would have resulted from the instantiation of these 
template candidates are no longer
   produced. This aligns Clang closer to the behavior of GCC, and fixes 
(#GH62096), (#GH74581), and (#GH74581).
+- Implemented `P2719R5 Type-aware allocation and deallocation functions 
`_
+  as an extension in all C++ language modes.
 
 C++2c Feature Support
 ^
@@ -378,6 +380,11 @@ New Compiler Flags
 
 - New options ``-fthinlto-distributor=`` and ``-Xthinlto-distributor=`` added 
for Integrated Distributed ThinLTO (DTLTO). DTLTO enables the distribution of 
backend ThinLTO compilations via external distribution systems, such as 
Incredibuild, during the traditional link step. (#GH147265, `ThinLTODocs 
`_).
 
+- A new flag - `-static-libclosure` was introduced to support statically 
linking
+  the runtime for the Blocks extension on Windows. This flag currently only
+  changes the code generation, and even then, only on Windows. This does not
+  impact the linker behaviour like the other `-static-*` flags.
+
 Deprecated Compiler Flags
 -
 
@@ -1204,26 +1211,6 @@ Static Analyzer
 New features
 
 
-- A new flag - `-static-libclosure` was introduced to support statically 
linking
-  the runtime for the Blocks extension on Windows. This flag currently only
-  changes the code generation, and even then, only on Windows. This does not
-  impact the linker behaviour like the other `-static-*` flags.
-- OpenACC support, enabled via `-fopenacc` has reached a level of completeness
-  to finally be at least notionally usable. Currently, the OpenACC 3.4
-  specification has been completely implemented for Sema and AST creation, so
-  nodes will show up in the AST after having been properly checked. Lowering is
-  currently a work in progress, with compute, loop, and combined constructs
-  partially implemented, plus a handful of data and executable constructs
-  implemented. Lowering will only work in Clang-IR mode (so only with a 
compiler
-  built with Clang-IR enabled, and with `-fclangir` used on the command line).
-  However, note that the Clang-IR implementation status is also quite partial,
-  so frequent 'not yet implemented' diagnostics should be expected.  Also, the
-  ACC MLIR dialect does not currently implement any lowering to LLVM-IR, so no
-  code generation is possible for OpenACC.
-- Implemented `P2719R5 Type-aware allocation and deallocation functions 
`_
-  as an extension in all C++ language modes.
-
-
 Crash and bug fixes
 ^^^
 

``




https://github.com/llvm/llvm-project/pull/154608
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Move rest of documentation problems that found their way to the SA sec. (PR #154608)

2025-08-20 Thread Balazs Benics via llvm-branch-commits

steakhal wrote:

Thank you for going the extra mile.
I'll have a look tomorrow!

https://github.com/llvm/llvm-project/pull/154608
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [X86] SimplifyDemandedVectorEltsForTargetNode - don't split X86ISD::CVTTP2UI nodes without AVX512VL (#154504) (PR #154526)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: None (llvmbot)


Changes

Backport d770567a514716cdb250a2dee635435c22622e34

Requested by: @nikic

---
Full diff: https://github.com/llvm/llvm-project/pull/154526.diff


2 Files Affected:

- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+5-1) 
- (added) llvm/test/CodeGen/X86/pr154492.ll (+20) 


``diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c7839baf7de8e..85e5ebc385c68 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44178,8 +44178,12 @@ bool 
X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
 }
   // Conversions.
   // TODO: Add more CVT opcodes when we have test coverage.
-case X86ISD::CVTTP2SI:
 case X86ISD::CVTTP2UI: {
+  if (!Subtarget.hasVLX())
+break;
+  [[fallthrough]];
+}
+case X86ISD::CVTTP2SI: {
   if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f16 &&
   !Subtarget.hasVLX())
 break;
diff --git a/llvm/test/CodeGen/X86/pr154492.ll 
b/llvm/test/CodeGen/X86/pr154492.ll
new file mode 100644
index 0..1ba17594976e1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr154492.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f  | FileCheck %s 
--check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s 
--check-prefix=AVX512VL
+
+define <16 x i32> @PR154492() {
+; AVX512F-LABEL: PR154492:
+; AVX512F:   # %bb.0:
+; AVX512F-NEXT:vxorps %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT:vcvttps2udq %zmm0, %zmm0
+; AVX512F-NEXT:vmovaps %ymm0, %ymm0
+; AVX512F-NEXT:retq
+;
+; AVX512VL-LABEL: PR154492:
+; AVX512VL:   # %bb.0:
+; AVX512VL-NEXT:vxorps %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT:vcvttps2udq %ymm0, %ymm0
+; AVX512VL-NEXT:retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> 
zeroinitializer, <16 x i32> zeroinitializer, i16 255, i32 4)
+  ret <16 x i32> %res
+}

``




https://github.com/llvm/llvm-project/pull/154526
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [X86] SimplifyDemandedVectorEltsForTargetNode - don't split X86ISD::CVTTP2UI nodes without AVX512VL (#154504) (PR #154526)

2025-08-20 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/154526
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [X86] SimplifyDemandedVectorEltsForTargetNode - don't split X86ISD::CVTTP2UI nodes without AVX512VL (#154504) (PR #154526)

2025-08-20 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/154526

Backport d770567a514716cdb250a2dee635435c22622e34

Requested by: @nikic

>From 0cf566fd6434fcd52a36ded92b4bfdcde6b9681d Mon Sep 17 00:00:00 2001
From: Simon Pilgrim 
Date: Wed, 20 Aug 2025 12:18:10 +0100
Subject: [PATCH] [X86] SimplifyDemandedVectorEltsForTargetNode - don't split
 X86ISD::CVTTP2UI nodes without AVX512VL (#154504)

Unlike CVTTP2SI, CVTTP2UI is only available on AVX512 targets, so we
don't fallback to the AVX1 variant when we split a 512-bit vector, so we
can only use the 128/256-bit variants if we have AVX512VL.

Fixes #154492

(cherry picked from commit d770567a514716cdb250a2dee635435c22622e34)
---
 llvm/lib/Target/X86/X86ISelLowering.cpp |  6 +-
 llvm/test/CodeGen/X86/pr154492.ll   | 20 
 2 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/pr154492.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c7839baf7de8e..85e5ebc385c68 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44178,8 +44178,12 @@ bool 
X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
 }
   // Conversions.
   // TODO: Add more CVT opcodes when we have test coverage.
-case X86ISD::CVTTP2SI:
 case X86ISD::CVTTP2UI: {
+  if (!Subtarget.hasVLX())
+break;
+  [[fallthrough]];
+}
+case X86ISD::CVTTP2SI: {
   if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f16 &&
   !Subtarget.hasVLX())
 break;
diff --git a/llvm/test/CodeGen/X86/pr154492.ll 
b/llvm/test/CodeGen/X86/pr154492.ll
new file mode 100644
index 0..1ba17594976e1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr154492.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f  | FileCheck %s 
--check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s 
--check-prefix=AVX512VL
+
+define <16 x i32> @PR154492() {
+; AVX512F-LABEL: PR154492:
+; AVX512F:   # %bb.0:
+; AVX512F-NEXT:vxorps %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT:vcvttps2udq %zmm0, %zmm0
+; AVX512F-NEXT:vmovaps %ymm0, %ymm0
+; AVX512F-NEXT:retq
+;
+; AVX512VL-LABEL: PR154492:
+; AVX512VL:   # %bb.0:
+; AVX512VL-NEXT:vxorps %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT:vcvttps2udq %ymm0, %ymm0
+; AVX512VL-NEXT:retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> 
zeroinitializer, <16 x i32> zeroinitializer, i16 255, i32 4)
+  ret <16 x i32> %res
+}

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [X86] SimplifyDemandedVectorEltsForTargetNode - don't split X86ISD::CVTTP2UI nodes without AVX512VL (#154504) (PR #154526)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:

@nikic What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/154526
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 2 (PR #151309)

2025-08-20 Thread Matt Arsenault via llvm-branch-commits


@@ -5475,6 +5550,47 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr 
&MI,
  .addReg(Accumulator->getOperand(0).getReg());
 break;
   }
+  case AMDGPU::S_ADD_U64_PSEUDO:

arsenm wrote:

This is nearly identical to the existing expansion, need to not duplicate so 
much 

https://github.com/llvm/llvm-project/pull/151309
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Start using AV_MOV_B64_IMM_PSEUDO (PR #154500)

2025-08-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/154500

>From f8e02ce702188b6999cab8107edebad9f75b1ca5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 20 Aug 2025 18:21:20 +0900
Subject: [PATCH] AMDGPU: Start using AV_MOV_B64_IMM_PSEUDO

---
 .../Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp  |  22 +-
 .../AMDGPU/amdgpu-prepare-agpr-alloc.mir  |  53 +-
 .../AMDGPU/av-split-dead-valno-crash.ll   |  52 +-
 .../AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll|  16 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll | 578 +-
 ...m.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll |   8 +-
 6 files changed, 388 insertions(+), 341 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
index 3b06e9b00ac69..0137b3f5943d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
@@ -34,6 +34,8 @@ class AMDGPUPrepareAGPRAllocImpl {
   const SIInstrInfo &TII;
   MachineRegisterInfo &MRI;
 
+  bool isAV64Imm(const MachineOperand &MO) const;
+
 public:
   AMDGPUPrepareAGPRAllocImpl(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
   : TII(*ST.getInstrInfo()), MRI(MRI) {}
@@ -85,11 +87,16 @@ AMDGPUPrepareAGPRAllocPass::run(MachineFunction &MF,
   return PreservedAnalyses::all();
 }
 
+bool AMDGPUPrepareAGPRAllocImpl::isAV64Imm(const MachineOperand &MO) const {
+  return MO.isImm() && TII.isLegalAV64PseudoImm(MO.getImm());
+}
+
 bool AMDGPUPrepareAGPRAllocImpl::run(MachineFunction &MF) {
   if (MRI.isReserved(AMDGPU::AGPR0))
 return false;
 
-  const MCInstrDesc &AVImmPseudo = TII.get(AMDGPU::AV_MOV_B32_IMM_PSEUDO);
+  const MCInstrDesc &AVImmPseudo32 = TII.get(AMDGPU::AV_MOV_B32_IMM_PSEUDO);
+  const MCInstrDesc &AVImmPseudo64 = TII.get(AMDGPU::AV_MOV_B64_IMM_PSEUDO);
 
   bool Changed = false;
   for (MachineBasicBlock &MBB : MF) {
@@ -98,8 +105,19 @@ bool AMDGPUPrepareAGPRAllocImpl::run(MachineFunction &MF) {
TII.isInlineConstant(MI, 1)) ||
   (MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
MI.getOperand(1).isImm())) {
-MI.setDesc(AVImmPseudo);
+MI.setDesc(AVImmPseudo32);
+Changed = true;
+continue;
+  }
+
+  // TODO: If only half of the value is rewritable, is it worth splitting 
it
+  // up?
+  if ((MI.getOpcode() == AMDGPU::V_MOV_B64_e64 ||
+   MI.getOpcode() == AMDGPU::V_MOV_B64_PSEUDO) &&
+  isAV64Imm(MI.getOperand(1))) {
+MI.setDesc(AVImmPseudo64);
 Changed = true;
+continue;
   }
 }
   }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir 
b/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir
index d277c8104fe44..aaacf1d6f793b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir
@@ -114,22 +114,22 @@ body: |
   ; HAS-AGPR-NEXT:   liveins: $vgpr0_vgpr1
   ; HAS-AGPR-NEXT: {{  $}}
   ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 
$vgpr0_vgpr1, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 54, 
implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 1, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 64, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_3:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 %stack.0, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
65, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
279172874240, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
279172874305, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
4290672329938, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
-9223372036854775808, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B6:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
1042479491, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B7:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
4477415320595726336, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B8:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
$vgpr0_vgpr1, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B9:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
%stack.0, implicit $exec
+  ; HAS-AGPR-NEXT:   [[AV_MOV_:%[0-9]+]]:vreg_64_align2 = 
AV_MOV_B64_IMM_PSEUDO 54, implicit $exec
+  ; HAS-AGPR-NEXT:   [[AV_MOV_1:%[0-9]+]]:vreg_64_align2 = 
AV_MOV_B64_IMM_PSEUDO 1, implicit $exec
+  ; HAS-AGPR-NEXT:   [[AV_MOV_2:%[0-9]+]]:vreg_64_align2 = 
AV_MOV_B64_IMM_PSEUDO 64, implicit $exec
+  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 %stack.0, implicit $exec
+  ; HAS-AGPR-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_M

[llvm-branch-commits] [llvm] AMDGPU: Start using AV_MOV_B64_IMM_PSEUDO (PR #154500)

2025-08-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/154500

>From f8e02ce702188b6999cab8107edebad9f75b1ca5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 20 Aug 2025 18:21:20 +0900
Subject: [PATCH] AMDGPU: Start using AV_MOV_B64_IMM_PSEUDO

---
 .../Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp  |  22 +-
 .../AMDGPU/amdgpu-prepare-agpr-alloc.mir  |  53 +-
 .../AMDGPU/av-split-dead-valno-crash.ll   |  52 +-
 .../AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll|  16 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll | 578 +-
 ...m.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll |   8 +-
 6 files changed, 388 insertions(+), 341 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
index 3b06e9b00ac69..0137b3f5943d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
@@ -34,6 +34,8 @@ class AMDGPUPrepareAGPRAllocImpl {
   const SIInstrInfo &TII;
   MachineRegisterInfo &MRI;
 
+  bool isAV64Imm(const MachineOperand &MO) const;
+
 public:
   AMDGPUPrepareAGPRAllocImpl(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
   : TII(*ST.getInstrInfo()), MRI(MRI) {}
@@ -85,11 +87,16 @@ AMDGPUPrepareAGPRAllocPass::run(MachineFunction &MF,
   return PreservedAnalyses::all();
 }
 
+bool AMDGPUPrepareAGPRAllocImpl::isAV64Imm(const MachineOperand &MO) const {
+  return MO.isImm() && TII.isLegalAV64PseudoImm(MO.getImm());
+}
+
 bool AMDGPUPrepareAGPRAllocImpl::run(MachineFunction &MF) {
   if (MRI.isReserved(AMDGPU::AGPR0))
 return false;
 
-  const MCInstrDesc &AVImmPseudo = TII.get(AMDGPU::AV_MOV_B32_IMM_PSEUDO);
+  const MCInstrDesc &AVImmPseudo32 = TII.get(AMDGPU::AV_MOV_B32_IMM_PSEUDO);
+  const MCInstrDesc &AVImmPseudo64 = TII.get(AMDGPU::AV_MOV_B64_IMM_PSEUDO);
 
   bool Changed = false;
   for (MachineBasicBlock &MBB : MF) {
@@ -98,8 +105,19 @@ bool AMDGPUPrepareAGPRAllocImpl::run(MachineFunction &MF) {
TII.isInlineConstant(MI, 1)) ||
   (MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
MI.getOperand(1).isImm())) {
-MI.setDesc(AVImmPseudo);
+MI.setDesc(AVImmPseudo32);
+Changed = true;
+continue;
+  }
+
+  // TODO: If only half of the value is rewritable, is it worth splitting 
it
+  // up?
+  if ((MI.getOpcode() == AMDGPU::V_MOV_B64_e64 ||
+   MI.getOpcode() == AMDGPU::V_MOV_B64_PSEUDO) &&
+  isAV64Imm(MI.getOperand(1))) {
+MI.setDesc(AVImmPseudo64);
 Changed = true;
+continue;
   }
 }
   }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir 
b/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir
index d277c8104fe44..aaacf1d6f793b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir
@@ -114,22 +114,22 @@ body: |
   ; HAS-AGPR-NEXT:   liveins: $vgpr0_vgpr1
   ; HAS-AGPR-NEXT: {{  $}}
   ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 
$vgpr0_vgpr1, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 54, 
implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 1, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 64, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_3:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 %stack.0, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
65, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
279172874240, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
279172874305, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
4290672329938, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
-9223372036854775808, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B6:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
1042479491, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B7:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
4477415320595726336, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B8:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
$vgpr0_vgpr1, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B9:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
%stack.0, implicit $exec
+  ; HAS-AGPR-NEXT:   [[AV_MOV_:%[0-9]+]]:vreg_64_align2 = 
AV_MOV_B64_IMM_PSEUDO 54, implicit $exec
+  ; HAS-AGPR-NEXT:   [[AV_MOV_1:%[0-9]+]]:vreg_64_align2 = 
AV_MOV_B64_IMM_PSEUDO 1, implicit $exec
+  ; HAS-AGPR-NEXT:   [[AV_MOV_2:%[0-9]+]]:vreg_64_align2 = 
AV_MOV_B64_IMM_PSEUDO 64, implicit $exec
+  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 %stack.0, implicit $exec
+  ; HAS-AGPR-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_M

[llvm-branch-commits] [llvm] release/21.x: [TailDup] Delay aggressive computed-goto taildup to after RegAlloc. (#150911) (PR #151680)

2025-08-20 Thread Florian Hahn via llvm-branch-commits

fhahn wrote:

> > Ping :)
> > If the X86 side of things is a blocker, we can also make it only to apply 
> > to AArch64 or Apple platforms?
> 
> Yeah, that would be good to me.

Sounds good, I updated the PR to restrict this just to apple platforms for now, 
for which we did careful perf evaluations

https://github.com/llvm/llvm-project/pull/151680
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LoongArch] Broadcast repeated subsequence in build_vector instead of inserting per element (PR #154533)

2025-08-20 Thread via llvm-branch-commits

https://github.com/zhaoqi5 created 
https://github.com/llvm/llvm-project/pull/154533

None

>From 3674bad63bffc351ecd099baef91f90b8d1a0866 Mon Sep 17 00:00:00 2001
From: Qi Zhao 
Date: Wed, 20 Aug 2025 20:39:50 +0800
Subject: [PATCH 1/2] [LoongArch] Broadcast repeated subsequence in
 build_vector instead of inserting per element

---
 .../LoongArch/LoongArchISelLowering.cpp   | 53 +++
 .../Target/LoongArch/LoongArchISelLowering.h  |  2 +
 .../LoongArch/LoongArchLASXInstrInfo.td   | 22 +++-
 3 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 5b2d185594f44..de2a27143c389 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2434,6 +2434,7 @@ static SDValue 
lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
   BuildVectorSDNode *Node = cast(Op);
+  MVT VT = Node->getSimpleValueType(0);
   EVT ResTy = Op->getValueType(0);
   unsigned NumElts = ResTy.getVectorNumElements();
   SDLoc DL(Op);
@@ -2517,6 +2518,56 @@ SDValue 
LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
   }
 
   if (!IsConstant) {
+// If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to 
fill
+// the sub-sequence of the vector and then broadcast the sub-sequence.
+SmallVector Sequence;
+BitVector UndefElements;
+if (Node->getRepeatedSequence(Sequence, &UndefElements)) {
+  // TODO: If the BUILD_VECTOR contains undef elements, consider falling
+  // back to use INSERT_VECTOR_ELT to materialize the vector, because it
+  // generates worse code in some cases. This could be further optimized
+  // with more consideration.
+  if (UndefElements.count() == 0) {
+unsigned SeqLen = Sequence.size();
+
+SDValue Op0 = Sequence[0];
+SDValue Vector = DAG.getUNDEF(ResTy);
+if (!Op0.isUndef())
+  Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
+for (unsigned i = 1; i < SeqLen; ++i) {
+  SDValue Opi = Sequence[i];
+  if (Opi.isUndef())
+continue;
+  Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
+   DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+}
+
+unsigned SplatLen = NumElts / SeqLen;
+MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
+MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
+
+// If size of the sub-sequence is half of a 256-bits vector, bitcast 
the
+// vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
+if (SplatEltTy == MVT::i128)
+  SplatTy = MVT::v4i64;
+
+SDValue SrcVec = DAG.getBitcast(SplatTy, Vector);
+SDValue SplatVec;
+if (SplatTy.is256BitVector()) {
+  SplatVec =
+  DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
+: LoongArchISD::XVREPLVE0,
+  DL, SplatTy, SrcVec);
+} else {
+  SplatVec =
+  DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
+  DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
+}
+
+return DAG.getBitcast(ResTy, SplatVec);
+  }
+}
+
 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
 // The resulting code is the same length as the expansion, but it doesn't
 // use memory operations.
@@ -6637,6 +6688,8 @@ const char 
*LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
 NODE_NAME_CASE(VREPLVEI)
 NODE_NAME_CASE(VREPLGR2VR)
 NODE_NAME_CASE(XVPERMI)
+NODE_NAME_CASE(XVREPLVE0)
+NODE_NAME_CASE(XVREPLVE0Q)
 NODE_NAME_CASE(VPICK_SEXT_ELT)
 NODE_NAME_CASE(VPICK_ZEXT_ELT)
 NODE_NAME_CASE(VREPLVE)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index f79ba7450cc36..9ab867a918f4e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -141,6 +141,8 @@ enum NodeType : unsigned {
   VREPLVEI,
   VREPLGR2VR,
   XVPERMI,
+  XVREPLVE0,
+  XVREPLVE0Q,
 
   // Extended vector element extraction
   VPICK_SEXT_ELT,
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td 
b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 0696b11d62ac9..962448fcb470d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -10,8 +10,13 @@
 //
 
//===--===//
 
+def SDT_LoongArchXVREPLVE0 : SDTypeProfile<1, 

[llvm-branch-commits] [llvm] [LoongArch] Broadcast repeated subsequence in build_vector instead of inserting per element (PR #154533)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)


Changes



---

Patch is 43.56 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/154533.diff


5 Files Affected:

- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+53) 
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+2) 
- (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+21-1) 
- (modified) llvm/test/CodeGen/LoongArch/lasx/build-vector.ll (+132-542) 
- (modified) llvm/test/CodeGen/LoongArch/lsx/build-vector.ll (+9-51) 


``diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 5b2d185594f44..de2a27143c389 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2434,6 +2434,7 @@ static SDValue 
lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
   BuildVectorSDNode *Node = cast(Op);
+  MVT VT = Node->getSimpleValueType(0);
   EVT ResTy = Op->getValueType(0);
   unsigned NumElts = ResTy.getVectorNumElements();
   SDLoc DL(Op);
@@ -2517,6 +2518,56 @@ SDValue 
LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
   }
 
   if (!IsConstant) {
+// If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to 
fill
+// the sub-sequence of the vector and then broadcast the sub-sequence.
+SmallVector Sequence;
+BitVector UndefElements;
+if (Node->getRepeatedSequence(Sequence, &UndefElements)) {
+  // TODO: If the BUILD_VECTOR contains undef elements, consider falling
+  // back to use INSERT_VECTOR_ELT to materialize the vector, because it
+  // generates worse code in some cases. This could be further optimized
+  // with more consideration.
+  if (UndefElements.count() == 0) {
+unsigned SeqLen = Sequence.size();
+
+SDValue Op0 = Sequence[0];
+SDValue Vector = DAG.getUNDEF(ResTy);
+if (!Op0.isUndef())
+  Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
+for (unsigned i = 1; i < SeqLen; ++i) {
+  SDValue Opi = Sequence[i];
+  if (Opi.isUndef())
+continue;
+  Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
+   DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+}
+
+unsigned SplatLen = NumElts / SeqLen;
+MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
+MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
+
+// If size of the sub-sequence is half of a 256-bits vector, bitcast 
the
+// vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
+if (SplatEltTy == MVT::i128)
+  SplatTy = MVT::v4i64;
+
+SDValue SrcVec = DAG.getBitcast(SplatTy, Vector);
+SDValue SplatVec;
+if (SplatTy.is256BitVector()) {
+  SplatVec =
+  DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
+: LoongArchISD::XVREPLVE0,
+  DL, SplatTy, SrcVec);
+} else {
+  SplatVec =
+  DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
+  DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
+}
+
+return DAG.getBitcast(ResTy, SplatVec);
+  }
+}
+
 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
 // The resulting code is the same length as the expansion, but it doesn't
 // use memory operations.
@@ -6637,6 +6688,8 @@ const char 
*LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
 NODE_NAME_CASE(VREPLVEI)
 NODE_NAME_CASE(VREPLGR2VR)
 NODE_NAME_CASE(XVPERMI)
+NODE_NAME_CASE(XVREPLVE0)
+NODE_NAME_CASE(XVREPLVE0Q)
 NODE_NAME_CASE(VPICK_SEXT_ELT)
 NODE_NAME_CASE(VPICK_ZEXT_ELT)
 NODE_NAME_CASE(VREPLVE)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index f79ba7450cc36..9ab867a918f4e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -141,6 +141,8 @@ enum NodeType : unsigned {
   VREPLVEI,
   VREPLGR2VR,
   XVPERMI,
+  XVREPLVE0,
+  XVREPLVE0Q,
 
   // Extended vector element extraction
   VPICK_SEXT_ELT,
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td 
b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 0696b11d62ac9..962448fcb470d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -10,8 +10,13 @@
 //
 
//===--===//
 
+de

[llvm-branch-commits] [flang] [flang][OpenMP] Semantic checks for DYN_GROUPPRIVATE (PR #154550)

2025-08-20 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz created 
https://github.com/llvm/llvm-project/pull/154550

Add checks for non-STRICT values of the prescriptiveness modifier on clauses 
that had accepted it prior to the addition of FALLBACK value (GRAINSIZE and 
NUM_TASKS).

>From e00ef602a8fb143d963ec2ec4264f96544929a34 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Thu, 14 Aug 2025 13:26:23 -0500
Subject: [PATCH] [flang][OpenMP] Semantic checks for DYN_GROUPPRIVATE

Add checks for non-STRICT values of the prescriptiveness modifier on
clauses that had accepted it prior to the addition of FALLBACK value
(GRAINSIZE and NUM_TASKS).
---
 .../flang/Semantics/openmp-modifiers.h|  1 +
 flang/lib/Semantics/check-omp-structure.cpp   | 68 ++-
 flang/lib/Semantics/openmp-modifiers.cpp  | 19 ++
 .../OpenMP/prescriptiveness-modifier.f90  | 47 +
 4 files changed, 132 insertions(+), 3 deletions(-)
 create mode 100644 flang/test/Semantics/OpenMP/prescriptiveness-modifier.f90

diff --git a/flang/include/flang/Semantics/openmp-modifiers.h 
b/flang/include/flang/Semantics/openmp-modifiers.h
index e0eae984731c7..f74e58a94f768 100644
--- a/flang/include/flang/Semantics/openmp-modifiers.h
+++ b/flang/include/flang/Semantics/openmp-modifiers.h
@@ -67,6 +67,7 @@ template  const OmpModifierDescriptor 
&OmpGetDescriptor();
 #define DECLARE_DESCRIPTOR(name) \
   template <> const OmpModifierDescriptor &OmpGetDescriptor()
 
+DECLARE_DESCRIPTOR(parser::OmpAccessGroup);
 DECLARE_DESCRIPTOR(parser::OmpAlignment);
 DECLARE_DESCRIPTOR(parser::OmpAlignModifier);
 DECLARE_DESCRIPTOR(parser::OmpAllocatorComplexModifier);
diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index 2b36b085ae08d..e83a3616d6965 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -471,6 +471,45 @@ void OmpStructureChecker::Enter(const 
parser::OmpClause::Hint &x) {
   }
 }
 
+void OmpStructureChecker::Enter(const parser::OmpClause::DynGroupprivate &x) {
+  CheckAllowedClause(llvm::omp::Clause::OMPC_dyn_groupprivate);
+  parser::CharBlock source{GetContext().clauseSource};
+
+  OmpVerifyModifiers(x.v, llvm::omp::OMPC_dyn_groupprivate, source, context_);
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::Grainsize &x) {
+  CheckAllowedClause(llvm::omp::Clause::OMPC_grainsize);
+  parser::CharBlock source{GetContext().clauseSource};
+
+  if (OmpVerifyModifiers(x.v, llvm::omp::OMPC_grainsize, source, context_)) {
+auto &modifiers{OmpGetModifiers(x.v)};
+for (auto *mod :
+OmpGetRepeatableModifier(modifiers)) {
+  if (mod->v != parser::OmpPrescriptiveness::Value::Strict) {
+context_.Say(OmpGetModifierSource(modifiers, mod),
+"Only STRICT is allowed as prescriptiveness on this 
clause"_err_en_US);
+  }
+}
+  }
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::NumTasks &x) {
+  CheckAllowedClause(llvm::omp::Clause::OMPC_num_tasks);
+  parser::CharBlock source{GetContext().clauseSource};
+
+  if (OmpVerifyModifiers(x.v, llvm::omp::OMPC_num_tasks, source, context_)) {
+auto &modifiers{OmpGetModifiers(x.v)};
+for (auto *mod :
+OmpGetRepeatableModifier(modifiers)) {
+  if (mod->v != parser::OmpPrescriptiveness::Value::Strict) {
+context_.Say(OmpGetModifierSource(modifiers, mod),
+"Only STRICT is allowed as prescriptiveness on this 
clause"_err_en_US);
+  }
+}
+  }
+}
+
 void OmpStructureChecker::Enter(const parser::OmpDirectiveSpecification &x) {
   // OmpDirectiveSpecification exists on its own only in METADIRECTIVE.
   // In other cases it's a part of other constructs that handle directive
@@ -2542,6 +2581,32 @@ void OmpStructureChecker::Leave(const 
parser::OmpClauseList &) {
 }
   }
 
+  // Default access-group for DYN_GROUPPRIVATE is "cgroup". On a given
+  // construct there can be at most one DYN_GROUPPRIVATE with a given
+  // access-group.
+  const parser::OmpClause
+  *accGrpClause[parser::OmpAccessGroup::Value_enumSize] = {nullptr};
+  for (auto [_, clause] :
+  FindClauses(llvm::omp::Clause::OMPC_dyn_groupprivate)) {
+auto &wrapper{std::get(clause->u)};
+auto &modifiers{OmpGetModifiers(wrapper.v)};
+auto accGrp{parser::OmpAccessGroup::Value::Cgroup};
+if (auto *ag{OmpGetUniqueModifier(modifiers)}) {
+  accGrp = ag->v;
+}
+auto &firstClause{accGrpClause[llvm::to_underlying(accGrp)]};
+if (firstClause) {
+  context_
+  .Say(clause->source,
+  "The access-group modifier can only occur on a single clause in 
a construct"_err_en_US)
+  .Attach(firstClause->source,
+  "Previous clause with access-group modifier"_en_US);
+  break;
+} else {
+  firstClause = clause;
+}
+  }
+
   CheckRequireAtLeastOneOf();
 }
 
@@ -2593,18 +2658,15 @@ CHECK_SIMPLE_CLAUSE(Default, OMPC_default)
 CHECK_SIMPLE_CLAUSE(Depobj

[llvm-branch-commits] [flang] [flang][OpenMP] Semantic checks for DYN_GROUPPRIVATE (PR #154550)

2025-08-20 Thread Krzysztof Parzyszek via llvm-branch-commits

kparzysz wrote:

PR stack:
- https://github.com/llvm/llvm-project/pull/154549
- https://github.com/llvm/llvm-project/pull/154550 (this PR)

https://github.com/llvm/llvm-project/pull/154550
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [X86] SimplifyDemandedVectorEltsForTargetNode - don't split X86ISD::CVTTP2UI nodes without AVX512VL (#154504) (PR #154526)

2025-08-20 Thread Phoebe Wang via llvm-branch-commits

https://github.com/phoebewang approved this pull request.

LGTM.

https://github.com/llvm/llvm-project/pull/154526
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Semantic checks for DYN_GROUPPRIVATE (PR #154550)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Krzysztof Parzyszek (kparzysz)


Changes

Add checks for non-STRICT values of the prescriptiveness modifier on clauses 
that had accepted it prior to the addition of FALLBACK value (GRAINSIZE and 
NUM_TASKS).

---
Full diff: https://github.com/llvm/llvm-project/pull/154550.diff


4 Files Affected:

- (modified) flang/include/flang/Semantics/openmp-modifiers.h (+1) 
- (modified) flang/lib/Semantics/check-omp-structure.cpp (+65-3) 
- (modified) flang/lib/Semantics/openmp-modifiers.cpp (+19) 
- (added) flang/test/Semantics/OpenMP/prescriptiveness-modifier.f90 (+47) 


``diff
diff --git a/flang/include/flang/Semantics/openmp-modifiers.h 
b/flang/include/flang/Semantics/openmp-modifiers.h
index e0eae984731c7..f74e58a94f768 100644
--- a/flang/include/flang/Semantics/openmp-modifiers.h
+++ b/flang/include/flang/Semantics/openmp-modifiers.h
@@ -67,6 +67,7 @@ template  const OmpModifierDescriptor 
&OmpGetDescriptor();
 #define DECLARE_DESCRIPTOR(name) \
   template <> const OmpModifierDescriptor &OmpGetDescriptor()
 
+DECLARE_DESCRIPTOR(parser::OmpAccessGroup);
 DECLARE_DESCRIPTOR(parser::OmpAlignment);
 DECLARE_DESCRIPTOR(parser::OmpAlignModifier);
 DECLARE_DESCRIPTOR(parser::OmpAllocatorComplexModifier);
diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index 2b36b085ae08d..e83a3616d6965 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -471,6 +471,45 @@ void OmpStructureChecker::Enter(const 
parser::OmpClause::Hint &x) {
   }
 }
 
+void OmpStructureChecker::Enter(const parser::OmpClause::DynGroupprivate &x) {
+  CheckAllowedClause(llvm::omp::Clause::OMPC_dyn_groupprivate);
+  parser::CharBlock source{GetContext().clauseSource};
+
+  OmpVerifyModifiers(x.v, llvm::omp::OMPC_dyn_groupprivate, source, context_);
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::Grainsize &x) {
+  CheckAllowedClause(llvm::omp::Clause::OMPC_grainsize);
+  parser::CharBlock source{GetContext().clauseSource};
+
+  if (OmpVerifyModifiers(x.v, llvm::omp::OMPC_grainsize, source, context_)) {
+auto &modifiers{OmpGetModifiers(x.v)};
+for (auto *mod :
+OmpGetRepeatableModifier(modifiers)) {
+  if (mod->v != parser::OmpPrescriptiveness::Value::Strict) {
+context_.Say(OmpGetModifierSource(modifiers, mod),
+"Only STRICT is allowed as prescriptiveness on this 
clause"_err_en_US);
+  }
+}
+  }
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::NumTasks &x) {
+  CheckAllowedClause(llvm::omp::Clause::OMPC_num_tasks);
+  parser::CharBlock source{GetContext().clauseSource};
+
+  if (OmpVerifyModifiers(x.v, llvm::omp::OMPC_num_tasks, source, context_)) {
+auto &modifiers{OmpGetModifiers(x.v)};
+for (auto *mod :
+OmpGetRepeatableModifier(modifiers)) {
+  if (mod->v != parser::OmpPrescriptiveness::Value::Strict) {
+context_.Say(OmpGetModifierSource(modifiers, mod),
+"Only STRICT is allowed as prescriptiveness on this 
clause"_err_en_US);
+  }
+}
+  }
+}
+
 void OmpStructureChecker::Enter(const parser::OmpDirectiveSpecification &x) {
   // OmpDirectiveSpecification exists on its own only in METADIRECTIVE.
   // In other cases it's a part of other constructs that handle directive
@@ -2542,6 +2581,32 @@ void OmpStructureChecker::Leave(const 
parser::OmpClauseList &) {
 }
   }
 
+  // Default access-group for DYN_GROUPPRIVATE is "cgroup". On a given
+  // construct there can be at most one DYN_GROUPPRIVATE with a given
+  // access-group.
+  const parser::OmpClause
+  *accGrpClause[parser::OmpAccessGroup::Value_enumSize] = {nullptr};
+  for (auto [_, clause] :
+  FindClauses(llvm::omp::Clause::OMPC_dyn_groupprivate)) {
+auto &wrapper{std::get(clause->u)};
+auto &modifiers{OmpGetModifiers(wrapper.v)};
+auto accGrp{parser::OmpAccessGroup::Value::Cgroup};
+if (auto *ag{OmpGetUniqueModifier(modifiers)}) {
+  accGrp = ag->v;
+}
+auto &firstClause{accGrpClause[llvm::to_underlying(accGrp)]};
+if (firstClause) {
+  context_
+  .Say(clause->source,
+  "The access-group modifier can only occur on a single clause in 
a construct"_err_en_US)
+  .Attach(firstClause->source,
+  "Previous clause with access-group modifier"_en_US);
+  break;
+} else {
+  firstClause = clause;
+}
+  }
+
   CheckRequireAtLeastOneOf();
 }
 
@@ -2593,18 +2658,15 @@ CHECK_SIMPLE_CLAUSE(Default, OMPC_default)
 CHECK_SIMPLE_CLAUSE(Depobj, OMPC_depobj)
 CHECK_SIMPLE_CLAUSE(DeviceType, OMPC_device_type)
 CHECK_SIMPLE_CLAUSE(DistSchedule, OMPC_dist_schedule)
-CHECK_SIMPLE_CLAUSE(DynGroupprivate, OMPC_dyn_groupprivate)
 CHECK_SIMPLE_CLAUSE(Exclusive, OMPC_exclusive)
 CHECK_SIMPLE_CLAUSE(Final, OMPC_final)
 CHECK_SIMPLE_CLAUSE(Flush, OMPC_flush)
 CHECK_SIMPLE_CLAUSE(Full, OMPC_full)
-CHECK_SIMPLE_CLA

[llvm-branch-commits] [mlir] release/21.x: [mlir] Make parser not rely on terminating null. (#151007) (PR #154551)

2025-08-20 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/154551
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] release/21.x: [mlir] Make parser not rely on terminating null. (#151007) (PR #154551)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: None (llvmbot)


Changes

Backport 217f9e57d1cc46de51d3b36177c4ba4049aaa805

Requested by: @EugeneZelenko

---
Full diff: https://github.com/llvm/llvm-project/pull/154551.diff


3 Files Affected:

- (modified) mlir/lib/AsmParser/DialectSymbolParser.cpp (+7) 
- (modified) mlir/lib/AsmParser/Lexer.cpp (+24-3) 
- (modified) mlir/lib/AsmParser/Lexer.h (+3) 


``diff
diff --git a/mlir/lib/AsmParser/DialectSymbolParser.cpp 
b/mlir/lib/AsmParser/DialectSymbolParser.cpp
index 9f4a87a6a02de..8b14e71118c3a 100644
--- a/mlir/lib/AsmParser/DialectSymbolParser.cpp
+++ b/mlir/lib/AsmParser/DialectSymbolParser.cpp
@@ -89,6 +89,7 @@ ParseResult Parser::parseDialectSymbolBody(StringRef &body,
 nestedPunctuation.pop_back();
 return success();
   };
+  const char *curBufferEnd = state.lex.getBufferEnd();
   do {
 // Handle code completions, which may appear in the middle of the symbol
 // body.
@@ -98,6 +99,12 @@ ParseResult Parser::parseDialectSymbolBody(StringRef &body,
   break;
 }
 
+if (curBufferEnd == curPtr) {
+  if (!nestedPunctuation.empty())
+return emitPunctError();
+  return emitError("unexpected nul or EOF in pretty dialect name");
+}
+
 char c = *curPtr++;
 switch (c) {
 case '\0':
diff --git a/mlir/lib/AsmParser/Lexer.cpp b/mlir/lib/AsmParser/Lexer.cpp
index 751bd63e537f8..8f53529823e23 100644
--- a/mlir/lib/AsmParser/Lexer.cpp
+++ b/mlir/lib/AsmParser/Lexer.cpp
@@ -37,6 +37,18 @@ Lexer::Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext 
*context,
  AsmParserCodeCompleteContext *codeCompleteContext)
 : sourceMgr(sourceMgr), context(context), codeCompleteLoc(nullptr) {
   auto bufferID = sourceMgr.getMainFileID();
+
+  // Check to see if the main buffer contains the last buffer, and if so the
+  // last buffer should be used as main file for parsing.
+  if (sourceMgr.getNumBuffers() > 1) {
+unsigned lastFileID = sourceMgr.getNumBuffers();
+const llvm::MemoryBuffer *main = sourceMgr.getMemoryBuffer(bufferID);
+const llvm::MemoryBuffer *last = sourceMgr.getMemoryBuffer(lastFileID);
+if (main->getBufferStart() <= last->getBufferStart() &&
+main->getBufferEnd() >= last->getBufferEnd()) {
+  bufferID = lastFileID;
+}
+  }
   curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
   curPtr = curBuffer.begin();
 
@@ -71,6 +83,7 @@ Token Lexer::emitError(const char *loc, const Twine &message) 
{
 }
 
 Token Lexer::lexToken() {
+  const char *curBufferEnd = curBuffer.end();
   while (true) {
 const char *tokStart = curPtr;
 
@@ -78,6 +91,9 @@ Token Lexer::lexToken() {
 if (tokStart == codeCompleteLoc)
   return formToken(Token::code_complete, tokStart);
 
+if (tokStart == curBufferEnd)
+  return formToken(Token::eof, tokStart);
+
 // Lex the next token.
 switch (*curPtr++) {
 default:
@@ -102,7 +118,7 @@ Token Lexer::lexToken() {
 case 0:
   // This may either be a nul character in the source file or may be the 
EOF
   // marker that llvm::MemoryBuffer guarantees will be there.
-  if (curPtr - 1 == curBuffer.end())
+  if (curPtr - 1 == curBufferEnd)
 return formToken(Token::eof, tokStart);
   continue;
 
@@ -259,7 +275,11 @@ void Lexer::skipComment() {
   assert(*curPtr == '/');
   ++curPtr;
 
+  const char *curBufferEnd = curBuffer.end();
   while (true) {
+if (curPtr == curBufferEnd)
+  return;
+
 switch (*curPtr++) {
 case '\n':
 case '\r':
@@ -267,7 +287,7 @@ void Lexer::skipComment() {
   return;
 case 0:
   // If this is the end of the buffer, end the comment.
-  if (curPtr - 1 == curBuffer.end()) {
+  if (curPtr - 1 == curBufferEnd) {
 --curPtr;
 return;
   }
@@ -405,6 +425,7 @@ Token Lexer::lexPrefixedIdentifier(const char *tokStart) {
 Token Lexer::lexString(const char *tokStart) {
   assert(curPtr[-1] == '"');
 
+  const char *curBufferEnd = curBuffer.end();
   while (true) {
 // Check to see if there is a code completion location within the string. 
In
 // these cases we generate a completion location and place the currently
@@ -419,7 +440,7 @@ Token Lexer::lexString(const char *tokStart) {
 case 0:
   // If this is a random nul character in the middle of a string, just
   // include it.  If it is the end of file, then it is an error.
-  if (curPtr - 1 != curBuffer.end())
+  if (curPtr - 1 != curBufferEnd)
 continue;
   [[fallthrough]];
 case '\n':
diff --git a/mlir/lib/AsmParser/Lexer.h b/mlir/lib/AsmParser/Lexer.h
index 4085a9b73854b..670444eb1f5b4 100644
--- a/mlir/lib/AsmParser/Lexer.h
+++ b/mlir/lib/AsmParser/Lexer.h
@@ -40,6 +40,9 @@ class Lexer {
   /// Returns the start of the buffer.
   const char *getBufferBegin() { return curBuffer.data(); }
 
+  /// Returns the end of the buffer.
+  const char *getBufferEnd() { return curBuffer.end(); }
+
   /// Return the

[llvm-branch-commits] [mlir] release/21.x: [mlir] Make parser not rely on terminating null. (#151007) (PR #154551)

2025-08-20 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/154551

Backport 217f9e57d1cc46de51d3b36177c4ba4049aaa805

Requested by: @EugeneZelenko

>From 0c0c528430f7b966caeedb5754fcc591258ebdb2 Mon Sep 17 00:00:00 2001
From: Jacques Pienaar 
Date: Tue, 29 Jul 2025 04:59:46 +0200
Subject: [PATCH] [mlir] Make parser not rely on terminating null. (#151007)

Used in follow up to parse slices of buffer.

(cherry picked from commit 217f9e57d1cc46de51d3b36177c4ba4049aaa805)
---
 mlir/lib/AsmParser/DialectSymbolParser.cpp |  7 ++
 mlir/lib/AsmParser/Lexer.cpp   | 27 +++---
 mlir/lib/AsmParser/Lexer.h |  3 +++
 3 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/AsmParser/DialectSymbolParser.cpp 
b/mlir/lib/AsmParser/DialectSymbolParser.cpp
index 9f4a87a6a02de..8b14e71118c3a 100644
--- a/mlir/lib/AsmParser/DialectSymbolParser.cpp
+++ b/mlir/lib/AsmParser/DialectSymbolParser.cpp
@@ -89,6 +89,7 @@ ParseResult Parser::parseDialectSymbolBody(StringRef &body,
 nestedPunctuation.pop_back();
 return success();
   };
+  const char *curBufferEnd = state.lex.getBufferEnd();
   do {
 // Handle code completions, which may appear in the middle of the symbol
 // body.
@@ -98,6 +99,12 @@ ParseResult Parser::parseDialectSymbolBody(StringRef &body,
   break;
 }
 
+if (curBufferEnd == curPtr) {
+  if (!nestedPunctuation.empty())
+return emitPunctError();
+  return emitError("unexpected nul or EOF in pretty dialect name");
+}
+
 char c = *curPtr++;
 switch (c) {
 case '\0':
diff --git a/mlir/lib/AsmParser/Lexer.cpp b/mlir/lib/AsmParser/Lexer.cpp
index 751bd63e537f8..8f53529823e23 100644
--- a/mlir/lib/AsmParser/Lexer.cpp
+++ b/mlir/lib/AsmParser/Lexer.cpp
@@ -37,6 +37,18 @@ Lexer::Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext 
*context,
  AsmParserCodeCompleteContext *codeCompleteContext)
 : sourceMgr(sourceMgr), context(context), codeCompleteLoc(nullptr) {
   auto bufferID = sourceMgr.getMainFileID();
+
+  // Check to see if the main buffer contains the last buffer, and if so the
+  // last buffer should be used as main file for parsing.
+  if (sourceMgr.getNumBuffers() > 1) {
+unsigned lastFileID = sourceMgr.getNumBuffers();
+const llvm::MemoryBuffer *main = sourceMgr.getMemoryBuffer(bufferID);
+const llvm::MemoryBuffer *last = sourceMgr.getMemoryBuffer(lastFileID);
+if (main->getBufferStart() <= last->getBufferStart() &&
+main->getBufferEnd() >= last->getBufferEnd()) {
+  bufferID = lastFileID;
+}
+  }
   curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
   curPtr = curBuffer.begin();
 
@@ -71,6 +83,7 @@ Token Lexer::emitError(const char *loc, const Twine &message) 
{
 }
 
 Token Lexer::lexToken() {
+  const char *curBufferEnd = curBuffer.end();
   while (true) {
 const char *tokStart = curPtr;
 
@@ -78,6 +91,9 @@ Token Lexer::lexToken() {
 if (tokStart == codeCompleteLoc)
   return formToken(Token::code_complete, tokStart);
 
+if (tokStart == curBufferEnd)
+  return formToken(Token::eof, tokStart);
+
 // Lex the next token.
 switch (*curPtr++) {
 default:
@@ -102,7 +118,7 @@ Token Lexer::lexToken() {
 case 0:
   // This may either be a nul character in the source file or may be the 
EOF
   // marker that llvm::MemoryBuffer guarantees will be there.
-  if (curPtr - 1 == curBuffer.end())
+  if (curPtr - 1 == curBufferEnd)
 return formToken(Token::eof, tokStart);
   continue;
 
@@ -259,7 +275,11 @@ void Lexer::skipComment() {
   assert(*curPtr == '/');
   ++curPtr;
 
+  const char *curBufferEnd = curBuffer.end();
   while (true) {
+if (curPtr == curBufferEnd)
+  return;
+
 switch (*curPtr++) {
 case '\n':
 case '\r':
@@ -267,7 +287,7 @@ void Lexer::skipComment() {
   return;
 case 0:
   // If this is the end of the buffer, end the comment.
-  if (curPtr - 1 == curBuffer.end()) {
+  if (curPtr - 1 == curBufferEnd) {
 --curPtr;
 return;
   }
@@ -405,6 +425,7 @@ Token Lexer::lexPrefixedIdentifier(const char *tokStart) {
 Token Lexer::lexString(const char *tokStart) {
   assert(curPtr[-1] == '"');
 
+  const char *curBufferEnd = curBuffer.end();
   while (true) {
 // Check to see if there is a code completion location within the string. 
In
 // these cases we generate a completion location and place the currently
@@ -419,7 +440,7 @@ Token Lexer::lexString(const char *tokStart) {
 case 0:
   // If this is a random nul character in the middle of a string, just
   // include it.  If it is the end of file, then it is an error.
-  if (curPtr - 1 != curBuffer.end())
+  if (curPtr - 1 != curBufferEnd)
 continue;
   [[fallthrough]];
 case '\n':
diff --git a/mlir/lib/AsmParser/Lexer.h b/mlir/lib/AsmParser/Lexer.h
index 4085a9b73854b..670444eb1f5b4 100644
--- a/mlir/lib

[llvm-branch-commits] [mlir] release/21.x: [mlir] Make parser not rely on terminating null. (#151007) (PR #154551)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:

@joker-eph What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/154551
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy commented:

Thanks Chaitanya. Just a few small comments.

https://github.com/llvm/llvm-project/pull/154377
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add Lowering to omp mlir for workdistribute construct (PR #154378)

2025-08-20 Thread Kareem Ergawy via llvm-branch-commits


@@ -0,0 +1,30 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s

ergawy wrote:

Add a standalone `workdistribute` test as well?

https://github.com/llvm/llvm-project/pull/154378
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add Lowering to omp mlir for workdistribute construct (PR #154378)

2025-08-20 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy approved this pull request.

LGTM! Just a small comment.

https://github.com/llvm/llvm-project/pull/154378
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add Lowering to omp mlir for workdistribute construct (PR #154378)

2025-08-20 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy edited 
https://github.com/llvm/llvm-project/pull/154378
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread via llvm-branch-commits

https://github.com/skc7 ready_for_review 
https://github.com/llvm/llvm-project/pull/154377
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Chaitanya (skc7)


Changes

This PR adds workdistribute parser and semantic support in flang.

This is part 2 of workdistribute frontend support.

Part1 : #154376 [OpenMP] Add workdistribute construct in openMP dialect 
and in llvm frontend
Part2 : #154377 [flang][openmp] Add parser/semantic support for 
workdistribute
Part3 : #154378 [flang][openmp] Add Lowering to omp mlir for 
workdistribute construct

---
Full diff: https://github.com/llvm/llvm-project/pull/154377.diff


9 Files Affected:

- (modified) flang/include/flang/Semantics/openmp-directive-sets.h (+7) 
- (modified) flang/lib/Parser/openmp-parsers.cpp (+5-1) 
- (modified) flang/lib/Semantics/check-omp-structure.cpp (+95) 
- (modified) flang/lib/Semantics/check-omp-structure.h (+1) 
- (modified) flang/lib/Semantics/resolve-directives.cpp (+7-1) 
- (added) flang/test/Parser/OpenMP/workdistribute.f90 (+27) 
- (added) flang/test/Semantics/OpenMP/workdistribute01.f90 (+16) 
- (added) flang/test/Semantics/OpenMP/workdistribute02.f90 (+34) 
- (added) flang/test/Semantics/OpenMP/workdistribute03.f90 (+34) 


``diff
diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h 
b/flang/include/flang/Semantics/openmp-directive-sets.h
index cc66cc833e8b7..01e8481e05721 100644
--- a/flang/include/flang/Semantics/openmp-directive-sets.h
+++ b/flang/include/flang/Semantics/openmp-directive-sets.h
@@ -143,6 +143,7 @@ static const OmpDirectiveSet topTargetSet{
 Directive::OMPD_target_teams_distribute_parallel_do_simd,
 Directive::OMPD_target_teams_distribute_simd,
 Directive::OMPD_target_teams_loop,
+Directive::OMPD_target_teams_workdistribute,
 };
 
 static const OmpDirectiveSet allTargetSet{topTargetSet};
@@ -172,6 +173,7 @@ static const OmpDirectiveSet topTeamsSet{
 Directive::OMPD_teams_distribute_parallel_do_simd,
 Directive::OMPD_teams_distribute_simd,
 Directive::OMPD_teams_loop,
+Directive::OMPD_teams_workdistribute,
 };
 
 static const OmpDirectiveSet bottomTeamsSet{
@@ -187,6 +189,7 @@ static const OmpDirectiveSet allTeamsSet{
 Directive::OMPD_target_teams_distribute_parallel_do_simd,
 Directive::OMPD_target_teams_distribute_simd,
 Directive::OMPD_target_teams_loop,
+Directive::OMPD_target_teams_workdistribute,
 } | topTeamsSet,
 };
 
@@ -230,6 +233,9 @@ static const OmpDirectiveSet blockConstructSet{
 Directive::OMPD_taskgroup,
 Directive::OMPD_teams,
 Directive::OMPD_workshare,
+Directive::OMPD_target_teams_workdistribute,
+Directive::OMPD_teams_workdistribute,
+Directive::OMPD_workdistribute,
 };
 
 static const OmpDirectiveSet loopConstructSet{
@@ -376,6 +382,7 @@ static const OmpDirectiveSet 
nestedReduceWorkshareAllowedSet{
 };
 
 static const OmpDirectiveSet nestedTeamsAllowedSet{
+Directive::OMPD_workdistribute,
 Directive::OMPD_distribute,
 Directive::OMPD_distribute_parallel_do,
 Directive::OMPD_distribute_parallel_do_simd,
diff --git a/flang/lib/Parser/openmp-parsers.cpp 
b/flang/lib/Parser/openmp-parsers.cpp
index 56cee4ab38e9b..51b49a591b02f 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -1870,11 +1870,15 @@ TYPE_PARSER( //
 MakeBlockConstruct(llvm::omp::Directive::OMPD_target_data) ||
 MakeBlockConstruct(llvm::omp::Directive::OMPD_target_parallel) ||
 MakeBlockConstruct(llvm::omp::Directive::OMPD_target_teams) ||
+MakeBlockConstruct(
+llvm::omp::Directive::OMPD_target_teams_workdistribute) ||
 MakeBlockConstruct(llvm::omp::Directive::OMPD_target) ||
 MakeBlockConstruct(llvm::omp::Directive::OMPD_task) ||
 MakeBlockConstruct(llvm::omp::Directive::OMPD_taskgroup) ||
 MakeBlockConstruct(llvm::omp::Directive::OMPD_teams) ||
-MakeBlockConstruct(llvm::omp::Directive::OMPD_workshare))
+MakeBlockConstruct(llvm::omp::Directive::OMPD_teams_workdistribute) ||
+MakeBlockConstruct(llvm::omp::Directive::OMPD_workshare) ||
+MakeBlockConstruct(llvm::omp::Directive::OMPD_workdistribute))
 #undef MakeBlockConstruct
 
 // OMP SECTIONS Directive
diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index 2b36b085ae08d..4c4e17c39c03a 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -141,6 +141,67 @@ class OmpWorkshareBlockChecker {
   parser::CharBlock source_;
 };
 
+// 'OmpWorkdistributeBlockChecker' is used to check the validity of the
+// assignment statements and the expressions enclosed in an OpenMP
+// workdistribute construct
+class OmpWorkdistributeBlockChecker {
+public:
+  OmpWorkdistributeBlockChecker(
+  SemanticsContext &context, parser::CharBlock source)
+  : context_{context}, source_{source} {}
+
+  template  bool Pre(const T &) { return true; }
+  template  void Post(const T &) {}
+
+  bool Pre(const parser::AssignmentStmt &assignment) {
+  

[llvm-branch-commits] [flang] [flang][openmp] Add Lowering to omp mlir for workdistribute construct (PR #154378)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Chaitanya (skc7)


Changes

This PR adds lowering of workdistribute construct in flang to omp mlir dialect 
workdistribute op.

This is part 3 of workdistribute frontend support.

Part1 : #154376 [OpenMP] Add workdistribute construct in openMP dialect 
and in llvm frontend
Part2 : #154377 [flang][openmp] Add parser/semantic support for 
workdistribute
Part3 : #154378 [flang][openmp] Add Lowering to omp mlir for 
workdistribute construct

---
Full diff: https://github.com/llvm/llvm-project/pull/154378.diff


2 Files Affected:

- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+22-1) 
- (added) flang/test/Lower/OpenMP/workdistribute.f90 (+30) 


``diff
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index ec2ec37e623f8..1044b7ad31202 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -534,6 +534,13 @@ static void 
processHostEvalClauses(lower::AbstractConverter &converter,
   cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv);
   break;
 
+case OMPD_teams_workdistribute:
+  cp.processThreadLimit(stmtCtx, hostInfo->ops);
+  [[fallthrough]];
+case OMPD_target_teams_workdistribute:
+  cp.processNumTeams(stmtCtx, hostInfo->ops);
+  break;
+
 // Standalone 'target' case.
 case OMPD_target: {
   processSingleNestedIf(
@@ -2820,6 +2827,17 @@ genTeamsOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   queue, item, clauseOps);
 }
 
+static mlir::omp::WorkdistributeOp genWorkdistributeOp(
+lower::AbstractConverter &converter, lower::SymMap &symTable,
+semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+mlir::Location loc, const ConstructQueue &queue,
+ConstructQueue::const_iterator item) {
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workdistribute),
+  queue, item);
+}
+
 
//===--===//
 // Code generation functions for the standalone version of constructs that can
 // also be a leaf of a composite construct
@@ -3459,7 +3477,10 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   case llvm::omp::Directive::OMPD_unroll:
 genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
 break;
-  // case llvm::omp::Directive::OMPD_workdistribute:
+  case llvm::omp::Directive::OMPD_workdistribute:
+newOp = genWorkdistributeOp(converter, symTable, semaCtx, eval, loc, queue,
+item);
+break;
   case llvm::omp::Directive::OMPD_workshare:
 newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc,
queue, item);
diff --git a/flang/test/Lower/OpenMP/workdistribute.f90 
b/flang/test/Lower/OpenMP/workdistribute.f90
new file mode 100644
index 0..dc66cd73e692b
--- /dev/null
+++ b/flang/test/Lower/OpenMP/workdistribute.f90
@@ -0,0 +1,30 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPtarget_teams_workdistribute
+subroutine target_teams_workdistribute()
+  integer :: aa(10), bb(10)
+  ! CHECK: omp.target
+  ! CHECK: omp.teams
+  ! CHECK: omp.workdistribute
+  !$omp target teams workdistribute
+  aa = bb
+  ! CHECK: omp.terminator
+  ! CHECK: omp.terminator
+  ! CHECK: omp.terminator
+  !$omp end target teams workdistribute
+end subroutine target_teams_workdistribute
+
+! CHECK-LABEL: func @_QPteams_workdistribute
+subroutine teams_workdistribute()
+  use iso_fortran_env
+  real(kind=real32) :: a
+  real(kind=real32), dimension(10) :: x
+  real(kind=real32), dimension(10) :: y
+  ! CHECK: omp.teams
+  ! CHECK: omp.workdistribute
+  !$omp teams workdistribute
+  y = a * x + y
+  ! CHECK: omp.terminator
+  ! CHECK: omp.terminator
+  !$omp end teams workdistribute
+end subroutine teams_workdistribute

``




https://github.com/llvm/llvm-project/pull/154378
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add Lowering to omp mlir for workdistribute construct (PR #154378)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-fir-hlfir

Author: Chaitanya (skc7)


Changes

This PR adds lowering of workdistribute construct in flang to omp mlir dialect 
workdistribute op.

This is part 3 of workdistribute frontend support.

Part1 : #154376 [OpenMP] Add workdistribute construct in openMP dialect 
and in llvm frontend
Part2 : #154377 [flang][openmp] Add parser/semantic support for 
workdistribute
Part3 : #154378 [flang][openmp] Add Lowering to omp mlir for 
workdistribute construct

---
Full diff: https://github.com/llvm/llvm-project/pull/154378.diff


2 Files Affected:

- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+22-1) 
- (added) flang/test/Lower/OpenMP/workdistribute.f90 (+30) 


``diff
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index ec2ec37e623f8..1044b7ad31202 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -534,6 +534,13 @@ static void 
processHostEvalClauses(lower::AbstractConverter &converter,
   cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv);
   break;
 
+case OMPD_teams_workdistribute:
+  cp.processThreadLimit(stmtCtx, hostInfo->ops);
+  [[fallthrough]];
+case OMPD_target_teams_workdistribute:
+  cp.processNumTeams(stmtCtx, hostInfo->ops);
+  break;
+
 // Standalone 'target' case.
 case OMPD_target: {
   processSingleNestedIf(
@@ -2820,6 +2827,17 @@ genTeamsOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   queue, item, clauseOps);
 }
 
+static mlir::omp::WorkdistributeOp genWorkdistributeOp(
+lower::AbstractConverter &converter, lower::SymMap &symTable,
+semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+mlir::Location loc, const ConstructQueue &queue,
+ConstructQueue::const_iterator item) {
+  return genOpWithBody(
+  OpWithBodyGenInfo(converter, symTable, semaCtx, loc, eval,
+llvm::omp::Directive::OMPD_workdistribute),
+  queue, item);
+}
+
 
//===--===//
 // Code generation functions for the standalone version of constructs that can
 // also be a leaf of a composite construct
@@ -3459,7 +3477,10 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
   case llvm::omp::Directive::OMPD_unroll:
 genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
 break;
-  // case llvm::omp::Directive::OMPD_workdistribute:
+  case llvm::omp::Directive::OMPD_workdistribute:
+newOp = genWorkdistributeOp(converter, symTable, semaCtx, eval, loc, queue,
+item);
+break;
   case llvm::omp::Directive::OMPD_workshare:
 newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc,
queue, item);
diff --git a/flang/test/Lower/OpenMP/workdistribute.f90 
b/flang/test/Lower/OpenMP/workdistribute.f90
new file mode 100644
index 0..dc66cd73e692b
--- /dev/null
+++ b/flang/test/Lower/OpenMP/workdistribute.f90
@@ -0,0 +1,30 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPtarget_teams_workdistribute
+subroutine target_teams_workdistribute()
+  integer :: aa(10), bb(10)
+  ! CHECK: omp.target
+  ! CHECK: omp.teams
+  ! CHECK: omp.workdistribute
+  !$omp target teams workdistribute
+  aa = bb
+  ! CHECK: omp.terminator
+  ! CHECK: omp.terminator
+  ! CHECK: omp.terminator
+  !$omp end target teams workdistribute
+end subroutine target_teams_workdistribute
+
+! CHECK-LABEL: func @_QPteams_workdistribute
+subroutine teams_workdistribute()
+  use iso_fortran_env
+  real(kind=real32) :: a
+  real(kind=real32), dimension(10) :: x
+  real(kind=real32), dimension(10) :: y
+  ! CHECK: omp.teams
+  ! CHECK: omp.workdistribute
+  !$omp teams workdistribute
+  y = a * x + y
+  ! CHECK: omp.terminator
+  ! CHECK: omp.terminator
+  !$omp end teams workdistribute
+end subroutine teams_workdistribute

``




https://github.com/llvm/llvm-project/pull/154378
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-semantics

Author: Chaitanya (skc7)


Changes

This PR adds workdistribute parser and semantic support in flang.

This is part 2 of workdistribute frontend support.

Part1 : #154376 [OpenMP] Add workdistribute construct in openMP dialect 
and in llvm frontend
Part2 : #154377 [flang][openmp] Add parser/semantic support for 
workdistribute
Part3 : #154378 [flang][openmp] Add Lowering to omp mlir for 
workdistribute construct

---
Full diff: https://github.com/llvm/llvm-project/pull/154377.diff


9 Files Affected:

- (modified) flang/include/flang/Semantics/openmp-directive-sets.h (+7) 
- (modified) flang/lib/Parser/openmp-parsers.cpp (+5-1) 
- (modified) flang/lib/Semantics/check-omp-structure.cpp (+95) 
- (modified) flang/lib/Semantics/check-omp-structure.h (+1) 
- (modified) flang/lib/Semantics/resolve-directives.cpp (+7-1) 
- (added) flang/test/Parser/OpenMP/workdistribute.f90 (+27) 
- (added) flang/test/Semantics/OpenMP/workdistribute01.f90 (+16) 
- (added) flang/test/Semantics/OpenMP/workdistribute02.f90 (+34) 
- (added) flang/test/Semantics/OpenMP/workdistribute03.f90 (+34) 


``diff
diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h 
b/flang/include/flang/Semantics/openmp-directive-sets.h
index cc66cc833e8b7..01e8481e05721 100644
--- a/flang/include/flang/Semantics/openmp-directive-sets.h
+++ b/flang/include/flang/Semantics/openmp-directive-sets.h
@@ -143,6 +143,7 @@ static const OmpDirectiveSet topTargetSet{
 Directive::OMPD_target_teams_distribute_parallel_do_simd,
 Directive::OMPD_target_teams_distribute_simd,
 Directive::OMPD_target_teams_loop,
+Directive::OMPD_target_teams_workdistribute,
 };
 
 static const OmpDirectiveSet allTargetSet{topTargetSet};
@@ -172,6 +173,7 @@ static const OmpDirectiveSet topTeamsSet{
 Directive::OMPD_teams_distribute_parallel_do_simd,
 Directive::OMPD_teams_distribute_simd,
 Directive::OMPD_teams_loop,
+Directive::OMPD_teams_workdistribute,
 };
 
 static const OmpDirectiveSet bottomTeamsSet{
@@ -187,6 +189,7 @@ static const OmpDirectiveSet allTeamsSet{
 Directive::OMPD_target_teams_distribute_parallel_do_simd,
 Directive::OMPD_target_teams_distribute_simd,
 Directive::OMPD_target_teams_loop,
+Directive::OMPD_target_teams_workdistribute,
 } | topTeamsSet,
 };
 
@@ -230,6 +233,9 @@ static const OmpDirectiveSet blockConstructSet{
 Directive::OMPD_taskgroup,
 Directive::OMPD_teams,
 Directive::OMPD_workshare,
+Directive::OMPD_target_teams_workdistribute,
+Directive::OMPD_teams_workdistribute,
+Directive::OMPD_workdistribute,
 };
 
 static const OmpDirectiveSet loopConstructSet{
@@ -376,6 +382,7 @@ static const OmpDirectiveSet 
nestedReduceWorkshareAllowedSet{
 };
 
 static const OmpDirectiveSet nestedTeamsAllowedSet{
+Directive::OMPD_workdistribute,
 Directive::OMPD_distribute,
 Directive::OMPD_distribute_parallel_do,
 Directive::OMPD_distribute_parallel_do_simd,
diff --git a/flang/lib/Parser/openmp-parsers.cpp 
b/flang/lib/Parser/openmp-parsers.cpp
index 56cee4ab38e9b..51b49a591b02f 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -1870,11 +1870,15 @@ TYPE_PARSER( //
 MakeBlockConstruct(llvm::omp::Directive::OMPD_target_data) ||
 MakeBlockConstruct(llvm::omp::Directive::OMPD_target_parallel) ||
 MakeBlockConstruct(llvm::omp::Directive::OMPD_target_teams) ||
+MakeBlockConstruct(
+llvm::omp::Directive::OMPD_target_teams_workdistribute) ||
 MakeBlockConstruct(llvm::omp::Directive::OMPD_target) ||
 MakeBlockConstruct(llvm::omp::Directive::OMPD_task) ||
 MakeBlockConstruct(llvm::omp::Directive::OMPD_taskgroup) ||
 MakeBlockConstruct(llvm::omp::Directive::OMPD_teams) ||
-MakeBlockConstruct(llvm::omp::Directive::OMPD_workshare))
+MakeBlockConstruct(llvm::omp::Directive::OMPD_teams_workdistribute) ||
+MakeBlockConstruct(llvm::omp::Directive::OMPD_workshare) ||
+MakeBlockConstruct(llvm::omp::Directive::OMPD_workdistribute))
 #undef MakeBlockConstruct
 
 // OMP SECTIONS Directive
diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index 2b36b085ae08d..4c4e17c39c03a 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -141,6 +141,67 @@ class OmpWorkshareBlockChecker {
   parser::CharBlock source_;
 };
 
+// 'OmpWorkdistributeBlockChecker' is used to check the validity of the
+// assignment statements and the expressions enclosed in an OpenMP
+// workdistribute construct
+class OmpWorkdistributeBlockChecker {
+public:
+  OmpWorkdistributeBlockChecker(
+  SemanticsContext &context, parser::CharBlock source)
+  : context_{context}, source_{source} {}
+
+  template  bool Pre(const T &) { return true; }
+  template  void Post(const T &) {}
+
+  bool Pre(const parser::AssignmentStmt &assignment) {

[llvm-branch-commits] [clang] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)

2025-08-20 Thread via llvm-branch-commits

https://github.com/easyonaadit updated 
https://github.com/llvm/llvm-project/pull/150170

>From 11d61337d5df1e139a8523ee0854915b224ef650 Mon Sep 17 00:00:00 2001
From: Aaditya 
Date: Sat, 19 Jul 2025 12:57:27 +0530
Subject: [PATCH] Add builtins for wave reduction intrinsics

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def |  25 ++
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp  |  58 +++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl  | 378 +++
 3 files changed, 461 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index f8f55772db8fe..77344b999dd84 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -361,6 +361,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
 BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
 BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
 
+//===--===//
+
+// Wave Reduction builtins.
+
+//===--===//
+
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc")
+
 
//===--===//
 // R600-NI only builtins.
 
//===--===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp 
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index dad1f95ac710d..7471dc1bb3d50 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -295,11 +295,69 @@ void 
CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
   Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
 }
 
+static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
+  switch (BuiltinID) {
+  default:
+llvm_unreachable("Unknown BuiltinID for wave reduction");
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
+return Intrinsic::amdgcn_wave_reduce_add;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
+return Intrinsic::amdgcn_wave_reduce_sub;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
+return Intrinsic::amdgcn_wave_reduce_min;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
+return Intrinsic::amdgcn_wave_reduce_umin;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
+return Intrinsic::amdgcn_wave_reduce_max;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
+return Intrinsic::amdgcn_wave_reduce_umax;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
+return Intrinsic::amdgcn_wave_reduce_and;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
+return Intrinsic::amdgcn_wave_reduce_or;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
+return Intrinsic::amdgcn_wave_reduce_xor;
+  }
+}
+
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   const CallExpr *E) {
   llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
   llvm::SyncScope::ID SSID;
   switch (BuiltinID) {
+  case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+  case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u

[llvm-branch-commits] [llvm] release/21.x: [TailDup] Delay aggressive computed-goto taildup to after RegAlloc. (#150911) (PR #151680)

2025-08-20 Thread Phoebe Wang via llvm-branch-commits

https://github.com/phoebewang approved this pull request.

LGTM.

https://github.com/llvm/llvm-project/pull/151680
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)

2025-08-20 Thread via llvm-branch-commits

https://github.com/easyonaadit updated 
https://github.com/llvm/llvm-project/pull/150170

>From 11d61337d5df1e139a8523ee0854915b224ef650 Mon Sep 17 00:00:00 2001
From: Aaditya 
Date: Sat, 19 Jul 2025 12:57:27 +0530
Subject: [PATCH] Add builtins for wave reduction intrinsics

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def |  25 ++
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp  |  58 +++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl  | 378 +++
 3 files changed, 461 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index f8f55772db8fe..77344b999dd84 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -361,6 +361,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
 BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
 BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
 
+//===--===//
+
+// Wave Reduction builtins.
+
+//===--===//
+
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc")
+
 
//===--===//
 // R600-NI only builtins.
 
//===--===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp 
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index dad1f95ac710d..7471dc1bb3d50 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -295,11 +295,69 @@ void 
CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
   Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
 }
 
+static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
+  switch (BuiltinID) {
+  default:
+llvm_unreachable("Unknown BuiltinID for wave reduction");
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
+return Intrinsic::amdgcn_wave_reduce_add;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
+return Intrinsic::amdgcn_wave_reduce_sub;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
+return Intrinsic::amdgcn_wave_reduce_min;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
+return Intrinsic::amdgcn_wave_reduce_umin;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
+return Intrinsic::amdgcn_wave_reduce_max;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
+return Intrinsic::amdgcn_wave_reduce_umax;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
+return Intrinsic::amdgcn_wave_reduce_and;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
+return Intrinsic::amdgcn_wave_reduce_or;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
+return Intrinsic::amdgcn_wave_reduce_xor;
+  }
+}
+
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   const CallExpr *E) {
   llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
   llvm::SyncScope::ID SSID;
   switch (BuiltinID) {
+  case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+  case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u

[llvm-branch-commits] [flang] [flang][openmp] Add Lowering to omp mlir for workdistribute construct (PR #154378)

2025-08-20 Thread via llvm-branch-commits

https://github.com/skc7 ready_for_review 
https://github.com/llvm/llvm-project/pull/154378
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Start using AV_MOV_B64_IMM_PSEUDO (PR #154500)

2025-08-20 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/154500?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#154500** https://app.graphite.dev/github/pr/llvm/llvm-project/154500?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/154500?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#154499** https://app.graphite.dev/github/pr/llvm/llvm-project/154499?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/154500
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Start using AV_MOV_B64_IMM_PSEUDO (PR #154500)

2025-08-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/154500
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Start using AV_MOV_B64_IMM_PSEUDO (PR #154500)

2025-08-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/154500

None

>From a90508637fc5a676a5f9aafcfe0d9cbc46ee7303 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 20 Aug 2025 18:21:20 +0900
Subject: [PATCH] AMDGPU: Start using AV_MOV_B64_IMM_PSEUDO

---
 .../Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp  | 22 +++-
 .../AMDGPU/amdgpu-prepare-agpr-alloc.mir  | 53 +++
 .../AMDGPU/av-split-dead-valno-crash.ll   | 52 +-
 .../AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll| 16 +++---
 ...m.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll |  8 +--
 5 files changed, 90 insertions(+), 61 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
index 3b06e9b00ac69..0137b3f5943d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
@@ -34,6 +34,8 @@ class AMDGPUPrepareAGPRAllocImpl {
   const SIInstrInfo &TII;
   MachineRegisterInfo &MRI;
 
+  bool isAV64Imm(const MachineOperand &MO) const;
+
 public:
   AMDGPUPrepareAGPRAllocImpl(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
   : TII(*ST.getInstrInfo()), MRI(MRI) {}
@@ -85,11 +87,16 @@ AMDGPUPrepareAGPRAllocPass::run(MachineFunction &MF,
   return PreservedAnalyses::all();
 }
 
+bool AMDGPUPrepareAGPRAllocImpl::isAV64Imm(const MachineOperand &MO) const {
+  return MO.isImm() && TII.isLegalAV64PseudoImm(MO.getImm());
+}
+
 bool AMDGPUPrepareAGPRAllocImpl::run(MachineFunction &MF) {
   if (MRI.isReserved(AMDGPU::AGPR0))
 return false;
 
-  const MCInstrDesc &AVImmPseudo = TII.get(AMDGPU::AV_MOV_B32_IMM_PSEUDO);
+  const MCInstrDesc &AVImmPseudo32 = TII.get(AMDGPU::AV_MOV_B32_IMM_PSEUDO);
+  const MCInstrDesc &AVImmPseudo64 = TII.get(AMDGPU::AV_MOV_B64_IMM_PSEUDO);
 
   bool Changed = false;
   for (MachineBasicBlock &MBB : MF) {
@@ -98,8 +105,19 @@ bool AMDGPUPrepareAGPRAllocImpl::run(MachineFunction &MF) {
TII.isInlineConstant(MI, 1)) ||
   (MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
MI.getOperand(1).isImm())) {
-MI.setDesc(AVImmPseudo);
+MI.setDesc(AVImmPseudo32);
+Changed = true;
+continue;
+  }
+
+  // TODO: If only half of the value is rewritable, is it worth splitting 
it
+  // up?
+  if ((MI.getOpcode() == AMDGPU::V_MOV_B64_e64 ||
+   MI.getOpcode() == AMDGPU::V_MOV_B64_PSEUDO) &&
+  isAV64Imm(MI.getOperand(1))) {
+MI.setDesc(AVImmPseudo64);
 Changed = true;
+continue;
   }
 }
   }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir 
b/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir
index d277c8104fe44..aaacf1d6f793b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir
@@ -114,22 +114,22 @@ body: |
   ; HAS-AGPR-NEXT:   liveins: $vgpr0_vgpr1
   ; HAS-AGPR-NEXT: {{  $}}
   ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 
$vgpr0_vgpr1, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 54, 
implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 1, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 64, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_3:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 %stack.0, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
65, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
279172874240, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
279172874305, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
4290672329938, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
-9223372036854775808, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B6:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
1042479491, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B7:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
4477415320595726336, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B8:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
$vgpr0_vgpr1, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B9:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
%stack.0, implicit $exec
+  ; HAS-AGPR-NEXT:   [[AV_MOV_:%[0-9]+]]:vreg_64_align2 = 
AV_MOV_B64_IMM_PSEUDO 54, implicit $exec
+  ; HAS-AGPR-NEXT:   [[AV_MOV_1:%[0-9]+]]:vreg_64_align2 = 
AV_MOV_B64_IMM_PSEUDO 1, implicit $exec
+  ; HAS-AGPR-NEXT:   [[AV_MOV_2:%[0-9]+]]:vreg_64_align2 = 
AV_MOV_B64_IMM_PSEUDO 64, implicit $exec
+  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 %stack.0, implicit $exec
+  ; HAS-AGPR-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 65, 
implicit $

[llvm-branch-commits] [llvm] AMDGPU: Start using AV_MOV_B64_IMM_PSEUDO (PR #154500)

2025-08-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/154500.diff


5 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp (+20-2) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir (+31-22) 
- (modified) llvm/test/CodeGen/AMDGPU/av-split-dead-valno-crash.ll (+27-25) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll (+8-8) 
- (modified) 
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll (+4-4) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
index 3b06e9b00ac69..0137b3f5943d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp
@@ -34,6 +34,8 @@ class AMDGPUPrepareAGPRAllocImpl {
   const SIInstrInfo &TII;
   MachineRegisterInfo &MRI;
 
+  bool isAV64Imm(const MachineOperand &MO) const;
+
 public:
   AMDGPUPrepareAGPRAllocImpl(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
   : TII(*ST.getInstrInfo()), MRI(MRI) {}
@@ -85,11 +87,16 @@ AMDGPUPrepareAGPRAllocPass::run(MachineFunction &MF,
   return PreservedAnalyses::all();
 }
 
+bool AMDGPUPrepareAGPRAllocImpl::isAV64Imm(const MachineOperand &MO) const {
+  return MO.isImm() && TII.isLegalAV64PseudoImm(MO.getImm());
+}
+
 bool AMDGPUPrepareAGPRAllocImpl::run(MachineFunction &MF) {
   if (MRI.isReserved(AMDGPU::AGPR0))
 return false;
 
-  const MCInstrDesc &AVImmPseudo = TII.get(AMDGPU::AV_MOV_B32_IMM_PSEUDO);
+  const MCInstrDesc &AVImmPseudo32 = TII.get(AMDGPU::AV_MOV_B32_IMM_PSEUDO);
+  const MCInstrDesc &AVImmPseudo64 = TII.get(AMDGPU::AV_MOV_B64_IMM_PSEUDO);
 
   bool Changed = false;
   for (MachineBasicBlock &MBB : MF) {
@@ -98,8 +105,19 @@ bool AMDGPUPrepareAGPRAllocImpl::run(MachineFunction &MF) {
TII.isInlineConstant(MI, 1)) ||
   (MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
MI.getOperand(1).isImm())) {
-MI.setDesc(AVImmPseudo);
+MI.setDesc(AVImmPseudo32);
+Changed = true;
+continue;
+  }
+
+  // TODO: If only half of the value is rewritable, is it worth splitting 
it
+  // up?
+  if ((MI.getOpcode() == AMDGPU::V_MOV_B64_e64 ||
+   MI.getOpcode() == AMDGPU::V_MOV_B64_PSEUDO) &&
+  isAV64Imm(MI.getOperand(1))) {
+MI.setDesc(AVImmPseudo64);
 Changed = true;
+continue;
   }
 }
   }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir 
b/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir
index d277c8104fe44..aaacf1d6f793b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir
@@ -114,22 +114,22 @@ body: |
   ; HAS-AGPR-NEXT:   liveins: $vgpr0_vgpr1
   ; HAS-AGPR-NEXT: {{  $}}
   ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e64 
$vgpr0_vgpr1, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 54, 
implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 1, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 64, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_3:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 %stack.0, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
65, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
279172874240, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B3:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
279172874305, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B4:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
4290672329938, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B5:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
-9223372036854775808, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B6:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
1042479491, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B7:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
4477415320595726336, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B8:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
$vgpr0_vgpr1, implicit $exec
-  ; HAS-AGPR-NEXT:   [[V_MOV_B9:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 
%stack.0, implicit $exec
+  ; HAS-AGPR-NEXT:   [[AV_MOV_:%[0-9]+]]:vreg_64_align2 = 
AV_MOV_B64_IMM_PSEUDO 54, implicit $exec
+  ; HAS-AGPR-NEXT:   [[AV_MOV_1:%[0-9]+]]:vreg_64_align2 = 
AV_MOV_B64_IMM_PSEUDO 1, implicit $exec
+  ; HAS-AGPR-NEXT:   [[AV_MOV_2:%[0-9]+]]:vreg_64_align2 = 
AV_MOV_B64_IMM_PSEUDO 64, implicit $exec
+  ; HAS-AGPR-NEXT:   [[V_MOV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = 
V_MOV_B64_e64 %stack.0, implicit $exec
+  ; HAS-AGPR-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 65, 
implicit $exec
+  ; HAS-AGPR-NEXT:   [[V_MOV_B1:%[0-9]+]]:vreg_

[llvm-branch-commits] [llvm] release/21.x: [TailDup] Delay aggressive computed-goto taildup to after RegAlloc. (#150911) (PR #151680)

2025-08-20 Thread Florian Hahn via llvm-branch-commits

https://github.com/fhahn updated 
https://github.com/llvm/llvm-project/pull/151680

>From c587c24db5cf31a0c45a475b05b20b4b724b0cec Mon Sep 17 00:00:00 2001
From: Florian Hahn 
Date: Mon, 28 Jul 2025 09:37:00 +0100
Subject: [PATCH] [TailDup] Delay aggressive computed-goto taildup to after
 RegAlloc. (#150911)

Back-ports additional tests (eb9febb4a6b0, dc697de12792), refactoring
(43c9c14577db) and functional change (18f1369297f4) in a single PR.

https://github.com/llvm/llvm-project/pull/114990 allowed more aggressive
tail duplication for computed-gotos in both pre- and post-regalloc tail
duplication.

In some cases, performing tail-duplication too early can lead to worse
results, especially if we duplicate blocks with a number of phi nodes.

This is causing a ~3% performance regression in some workloads using
Python 3.12.

This patch updates TailDup to delay aggressive tail-duplication for
computed gotos to after register allocation.

This means we can keep the non-duplicated version for a bit longer
throughout the backend, which should reduce compile-time as well as
allowing a number of optimizations and simplifications to trigger before
drastically expanding the CFG.

For the case in https://github.com/llvm/llvm-project/issues/106846, I
get the same performance with and without this patch on Skylake.

PR: https://github.com/llvm/llvm-project/pull/150911
---
 llvm/include/llvm/CodeGen/MachineBasicBlock.h |   9 +-
 llvm/lib/CodeGen/TailDuplicator.cpp   |  23 ++-
 .../AArch64/late-taildup-computed-goto.ll | 143 ++
 ...o.mir => early-tail-dup-computed-goto.mir} |   4 +-
 .../X86/late-tail-dup-computed-goto.mir   | 128 
 5 files changed, 295 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/late-taildup-computed-goto.ll
 rename llvm/test/CodeGen/X86/{tail-dup-computed-goto.mir => 
early-tail-dup-computed-goto.mir} (99%)
 create mode 100644 llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir

diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h 
b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 938d71dd030e8..9e3d9196cc184 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -323,10 +323,11 @@ class MachineBasicBlock
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
 
-  /// Returns true if the original IR terminator is an `indirectbr`. This
-  /// typically corresponds to a `goto` in C, rather than jump tables.
-  bool terminatorIsComputedGoto() const {
-return back().isIndirectBranch() &&
+  /// Returns true if the original IR terminator is an `indirectbr` with
+  /// successor blocks. This typically corresponds to a `goto` in C, rather 
than
+  /// jump tables.
+  bool terminatorIsComputedGotoWithSuccessors() const {
+return back().isIndirectBranch() && !succ_empty() &&
llvm::all_of(successors(), [](const MachineBasicBlock *Succ) {
  return Succ->isIRBlockAddressTaken();
});
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp 
b/llvm/lib/CodeGen/TailDuplicator.cpp
index a88c57fdc165a..8cbdadd979810 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -604,12 +604,23 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   bool HasComputedGoto = false;
   if (!TailBB.empty()) {
 HasIndirectbr = TailBB.back().isIndirectBranch();
-HasComputedGoto = TailBB.terminatorIsComputedGoto();
+HasComputedGoto = TailBB.terminatorIsComputedGotoWithSuccessors();
   }
 
   if (HasIndirectbr && PreRegAlloc)
 MaxDuplicateCount = TailDupIndirectBranchSize;
 
+  // Allow higher limits when the block has computed-gotos and running after
+  // register allocation. NB. This basically unfactors computed gotos that were
+  // factored early on in the compilation process to speed up edge based data
+  // flow. If we do not unfactor them again, it can seriously pessimize code
+  // with many computed jumps in the source code, such as interpreters.
+  // Therefore we do not restrict the computed gotos.
+  bool DupComputedGotoLate =
+  HasComputedGoto && MF->getTarget().getTargetTriple().isOSDarwin();
+  if (DupComputedGotoLate && !PreRegAlloc)
+MaxDuplicateCount = std::max(MaxDuplicateCount, 10u);
+
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
   unsigned InstrCount = 0;
@@ -663,12 +674,10 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   // Duplicating a BB which has both multiple predecessors and successors will
   // may cause huge amount of PHI nodes. If we want to remove this limitation,
   // we have to address https://github.com/llvm/llvm-project/issues/78578.
-  // NB. This basically unfactors computed gotos that were factored early on in
-  // the compilation process to speed up edge based data flow. If we do not
-

[llvm-branch-commits] [flang] [flang][OpenMP] Semantic checks for DYN_GROUPPRIVATE (PR #154550)

2025-08-20 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/154550

>From e00ef602a8fb143d963ec2ec4264f96544929a34 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Thu, 14 Aug 2025 13:26:23 -0500
Subject: [PATCH 1/2] [flang][OpenMP] Semantic checks for DYN_GROUPPRIVATE

Add checks for non-STRICT values of the prescriptiveness modifier on
clauses that had accepted it prior to the addition of FALLBACK value
(GRAINSIZE and NUM_TASKS).
---
 .../flang/Semantics/openmp-modifiers.h|  1 +
 flang/lib/Semantics/check-omp-structure.cpp   | 68 ++-
 flang/lib/Semantics/openmp-modifiers.cpp  | 19 ++
 .../OpenMP/prescriptiveness-modifier.f90  | 47 +
 4 files changed, 132 insertions(+), 3 deletions(-)
 create mode 100644 flang/test/Semantics/OpenMP/prescriptiveness-modifier.f90

diff --git a/flang/include/flang/Semantics/openmp-modifiers.h 
b/flang/include/flang/Semantics/openmp-modifiers.h
index e0eae984731c7..f74e58a94f768 100644
--- a/flang/include/flang/Semantics/openmp-modifiers.h
+++ b/flang/include/flang/Semantics/openmp-modifiers.h
@@ -67,6 +67,7 @@ template  const OmpModifierDescriptor 
&OmpGetDescriptor();
 #define DECLARE_DESCRIPTOR(name) \
   template <> const OmpModifierDescriptor &OmpGetDescriptor()
 
+DECLARE_DESCRIPTOR(parser::OmpAccessGroup);
 DECLARE_DESCRIPTOR(parser::OmpAlignment);
 DECLARE_DESCRIPTOR(parser::OmpAlignModifier);
 DECLARE_DESCRIPTOR(parser::OmpAllocatorComplexModifier);
diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index 2b36b085ae08d..e83a3616d6965 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -471,6 +471,45 @@ void OmpStructureChecker::Enter(const 
parser::OmpClause::Hint &x) {
   }
 }
 
+void OmpStructureChecker::Enter(const parser::OmpClause::DynGroupprivate &x) {
+  CheckAllowedClause(llvm::omp::Clause::OMPC_dyn_groupprivate);
+  parser::CharBlock source{GetContext().clauseSource};
+
+  OmpVerifyModifiers(x.v, llvm::omp::OMPC_dyn_groupprivate, source, context_);
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::Grainsize &x) {
+  CheckAllowedClause(llvm::omp::Clause::OMPC_grainsize);
+  parser::CharBlock source{GetContext().clauseSource};
+
+  if (OmpVerifyModifiers(x.v, llvm::omp::OMPC_grainsize, source, context_)) {
+auto &modifiers{OmpGetModifiers(x.v)};
+for (auto *mod :
+OmpGetRepeatableModifier(modifiers)) {
+  if (mod->v != parser::OmpPrescriptiveness::Value::Strict) {
+context_.Say(OmpGetModifierSource(modifiers, mod),
+"Only STRICT is allowed as prescriptiveness on this 
clause"_err_en_US);
+  }
+}
+  }
+}
+
+void OmpStructureChecker::Enter(const parser::OmpClause::NumTasks &x) {
+  CheckAllowedClause(llvm::omp::Clause::OMPC_num_tasks);
+  parser::CharBlock source{GetContext().clauseSource};
+
+  if (OmpVerifyModifiers(x.v, llvm::omp::OMPC_num_tasks, source, context_)) {
+auto &modifiers{OmpGetModifiers(x.v)};
+for (auto *mod :
+OmpGetRepeatableModifier(modifiers)) {
+  if (mod->v != parser::OmpPrescriptiveness::Value::Strict) {
+context_.Say(OmpGetModifierSource(modifiers, mod),
+"Only STRICT is allowed as prescriptiveness on this 
clause"_err_en_US);
+  }
+}
+  }
+}
+
 void OmpStructureChecker::Enter(const parser::OmpDirectiveSpecification &x) {
   // OmpDirectiveSpecification exists on its own only in METADIRECTIVE.
   // In other cases it's a part of other constructs that handle directive
@@ -2542,6 +2581,32 @@ void OmpStructureChecker::Leave(const 
parser::OmpClauseList &) {
 }
   }
 
+  // Default access-group for DYN_GROUPPRIVATE is "cgroup". On a given
+  // construct there can be at most one DYN_GROUPPRIVATE with a given
+  // access-group.
+  const parser::OmpClause
+  *accGrpClause[parser::OmpAccessGroup::Value_enumSize] = {nullptr};
+  for (auto [_, clause] :
+  FindClauses(llvm::omp::Clause::OMPC_dyn_groupprivate)) {
+auto &wrapper{std::get(clause->u)};
+auto &modifiers{OmpGetModifiers(wrapper.v)};
+auto accGrp{parser::OmpAccessGroup::Value::Cgroup};
+if (auto *ag{OmpGetUniqueModifier(modifiers)}) {
+  accGrp = ag->v;
+}
+auto &firstClause{accGrpClause[llvm::to_underlying(accGrp)]};
+if (firstClause) {
+  context_
+  .Say(clause->source,
+  "The access-group modifier can only occur on a single clause in 
a construct"_err_en_US)
+  .Attach(firstClause->source,
+  "Previous clause with access-group modifier"_en_US);
+  break;
+} else {
+  firstClause = clause;
+}
+  }
+
   CheckRequireAtLeastOneOf();
 }
 
@@ -2593,18 +2658,15 @@ CHECK_SIMPLE_CLAUSE(Default, OMPC_default)
 CHECK_SIMPLE_CLAUSE(Depobj, OMPC_depobj)
 CHECK_SIMPLE_CLAUSE(DeviceType, OMPC_device_type)
 CHECK_SIMPLE_CLAUSE(DistSchedule, OMPC_dist_schedule)
-CHECK_SIMPLE_CLAUSE(DynGroupprivate, OMPC_

[llvm-branch-commits] [mlir] release/21.x: [mlir] Make parser not rely on terminating null. (#151007) (PR #154551)

2025-08-20 Thread Mehdi Amini via llvm-branch-commits

https://github.com/joker-eph approved this pull request.


https://github.com/llvm/llvm-project/pull/154551
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libc] [libc][math][c++23] Add nanbf16 math function (PR #153995)

2025-08-20 Thread via llvm-branch-commits

https://github.com/lntue approved this pull request.


https://github.com/llvm/llvm-project/pull/153995
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread Michael Klemm via llvm-branch-commits


@@ -4497,6 +4576,22 @@ void OmpStructureChecker::CheckWorkshareBlockStmts(
   }
 }
 
+void OmpStructureChecker::CheckWorkdistributeBlockStmts(
+const parser::Block &block, parser::CharBlock source) {
+  OmpWorkdistributeBlockChecker ompWorkdistributeBlockChecker{context_, 
source};
+
+  for (auto it{block.begin()}; it != block.end(); ++it) {
+if (parser::Unwrap(*it)) {
+  parser::Walk(*it, ompWorkdistributeBlockChecker);
+} else {
+  context_.Say(source,
+  "The structured block in a WORKDISTRIBUTE construct may consist of "
+  "only "
+  "SCALAR or ARRAY assignments"_err_en_US);

mjklemm wrote:

```suggestion
  "The structured block in a `WORKDISTRIBUTE` construct may consist of "
  "only scalar or array assignments"_err_en_US);
```

https://github.com/llvm/llvm-project/pull/154377
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add Lowering to omp mlir for workdistribute construct (PR #154378)

2025-08-20 Thread Michael Klemm via llvm-branch-commits

https://github.com/mjklemm approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/154378
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread Michael Klemm via llvm-branch-commits

https://github.com/mjklemm requested changes to this pull request.

Please slightly adjust the commented error message and put the `workdistribute` 
under a version check for OpenMP 6.0.

https://github.com/llvm/llvm-project/pull/154377
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread Michael Klemm via llvm-branch-commits

https://github.com/mjklemm edited 
https://github.com/llvm/llvm-project/pull/154377
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [CIR] Add support for string literal lvalues in ConstantLValueEmitter (PR #154360)

2025-08-20 Thread Morris Hafner via llvm-branch-commits

https://github.com/mmha closed https://github.com/llvm/llvm-project/pull/154360
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 3 (PR #151310)

2025-08-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/151310
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AArch64] Split large loop dependence masks (PR #153187)

2025-08-20 Thread Benjamin Maxwell via llvm-branch-commits


@@ -5248,49 +5248,94 @@ 
AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
  SelectionDAG &DAG) const {
   SDLoc DL(Op);
   uint64_t EltSize = Op.getConstantOperandVal(2);
-  EVT VT = Op.getValueType();
+  EVT FullVT = Op.getValueType();
+  unsigned NumElements = FullVT.getVectorMinNumElements();
+  unsigned NumSplits = 0;
+  EVT EltVT;
   switch (EltSize) {
   case 1:
-if (VT != MVT::v16i8 && VT != MVT::nxv16i1)
-  return SDValue();
+EltVT = MVT::i8;
 break;
   case 2:
-if (VT != MVT::v8i8 && VT != MVT::nxv8i1)
-  return SDValue();
+if (NumElements >= 16)
+  NumSplits = NumElements / 16;
+EltVT = MVT::i16;
 break;
   case 4:
-if (VT != MVT::v4i16 && VT != MVT::nxv4i1)
-  return SDValue();
+if (NumElements >= 8)
+  NumSplits = NumElements / 8;
+EltVT = MVT::i32;
 break;
   case 8:
-if (VT != MVT::v2i32 && VT != MVT::nxv2i1)
-  return SDValue();
+if (NumElements >= 4)
+  NumSplits = NumElements / 4;
+EltVT = MVT::i64;
 break;
   default:
 // Other element sizes are incompatible with whilewr/rw, so expand instead
 return SDValue();
   }
 
-  SDValue PtrA = Op.getOperand(0);
-  SDValue PtrB = Op.getOperand(1);
+  auto LowerToWhile = [&](EVT VT, unsigned AddrScale) {
+SDValue PtrA = Op.getOperand(0);
+SDValue PtrB = Op.getOperand(1);
 
-  if (VT.isScalableVT())
-return DAG.getNode(Op.getOpcode(), DL, VT, PtrA, PtrB, Op.getOperand(2));
+EVT StoreVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+   VT.getVectorMinNumElements(), false);
+if (AddrScale > 0) {
+  unsigned Offset = StoreVT.getStoreSizeInBits() / 8 * AddrScale;
+  SDValue Addend;
 
-  // We can use the SVE whilewr/whilerw instruction to lower this
-  // intrinsic by creating the appropriate sequence of scalable vector
-  // operations and then extracting a fixed-width subvector from the scalable
-  // vector. Scalable vector variants are already legal.
-  EVT ContainerVT =
-  EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
-   VT.getVectorNumElements(), true);
-  EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
+  if (VT.isScalableVT())
+Addend = DAG.getVScale(DL, MVT::i64, APInt(64, Offset));
+  else
+Addend = DAG.getConstant(Offset, DL, MVT::i64);
 
-  SDValue Mask =
-  DAG.getNode(Op.getOpcode(), DL, WhileVT, PtrA, PtrB, Op.getOperand(2));
-  SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, DL, ContainerVT, Mask);
-  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, MaskAsInt,
- DAG.getVectorIdxConstant(0, DL));
+  PtrA = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrA, Addend);
+  PtrB = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrB, Addend);
+}
+
+if (VT.isScalableVT())
+  return DAG.getNode(Op.getOpcode(), DL, VT, PtrA, PtrB, Op.getOperand(2));
+
+// We can use the SVE whilewr/whilerw instruction to lower this
+// intrinsic by creating the appropriate sequence of scalable vector
+// operations and then extracting a fixed-width subvector from the scalable
+// vector. Scalable vector variants are already legal.
+EVT ContainerVT =
+EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorNumElements(), true);
+EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
+
+SDValue Mask =
+DAG.getNode(Op.getOpcode(), DL, WhileVT, PtrA, PtrB, Op.getOperand(2));
+SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, DL, ContainerVT, Mask);
+return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, MaskAsInt,
+   DAG.getVectorIdxConstant(0, DL));
+  };
+
+  if (NumSplits == 0)
+return LowerToWhile(FullVT, 0);
+
+  SDValue FullVec = DAG.getUNDEF(FullVT);

MacDue wrote:

```suggestion
  SDValue FullVec = DAG.getPOISON(FullVT);
```

https://github.com/llvm/llvm-project/pull/153187
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AArch64] Split large loop dependence masks (PR #153187)

2025-08-20 Thread Benjamin Maxwell via llvm-branch-commits


@@ -5248,49 +5248,94 @@ 
AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
  SelectionDAG &DAG) const {
   SDLoc DL(Op);
   uint64_t EltSize = Op.getConstantOperandVal(2);
-  EVT VT = Op.getValueType();
+  EVT FullVT = Op.getValueType();
+  unsigned NumElements = FullVT.getVectorMinNumElements();
+  unsigned NumSplits = 0;
+  EVT EltVT;
   switch (EltSize) {
   case 1:
-if (VT != MVT::v16i8 && VT != MVT::nxv16i1)
-  return SDValue();
+EltVT = MVT::i8;
 break;
   case 2:
-if (VT != MVT::v8i8 && VT != MVT::nxv8i1)
-  return SDValue();
+if (NumElements >= 16)
+  NumSplits = NumElements / 16;
+EltVT = MVT::i16;
 break;
   case 4:
-if (VT != MVT::v4i16 && VT != MVT::nxv4i1)
-  return SDValue();
+if (NumElements >= 8)
+  NumSplits = NumElements / 8;
+EltVT = MVT::i32;
 break;
   case 8:
-if (VT != MVT::v2i32 && VT != MVT::nxv2i1)
-  return SDValue();
+if (NumElements >= 4)
+  NumSplits = NumElements / 4;
+EltVT = MVT::i64;

MacDue wrote:

I found the underlying pattern here somewhat obfuscated, I think this can be 
simplified quite a bit to: 
```cpp
  uint64_t EltSizeInBytes = Op.getConstantOperandVal(2);

  // Other element sizes are incompatible with whilewr/rw, so expand instead
  if (!is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes))
return SDValue();

  EVT FullVT = Op.getValueType();
  EVT EltVT = MVT::getIntegerVT(EltSizeInBytes * 8);

  unsigned NumElements = FullVT.getVectorMinNumElements();
  unsigned PredElements = getPackedSVEVectorVT(EltVT).getVectorMinNumElements();
  unsigned NumWhiles = NumElements / PredElements;
```


https://github.com/llvm/llvm-project/pull/153187
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AArch64] Split large loop dependence masks (PR #153187)

2025-08-20 Thread Benjamin Maxwell via llvm-branch-commits


@@ -5248,49 +5248,94 @@ 
AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
  SelectionDAG &DAG) const {
   SDLoc DL(Op);
   uint64_t EltSize = Op.getConstantOperandVal(2);

MacDue wrote:

nit: `EltSize` -> `EltSizeInBytes` (it's not obvious what the units are 
otherwise) 

https://github.com/llvm/llvm-project/pull/153187
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AArch64] Split large loop dependence masks (PR #153187)

2025-08-20 Thread Benjamin Maxwell via llvm-branch-commits


@@ -5248,49 +5248,94 @@ 
AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
  SelectionDAG &DAG) const {
   SDLoc DL(Op);
   uint64_t EltSize = Op.getConstantOperandVal(2);
-  EVT VT = Op.getValueType();
+  EVT FullVT = Op.getValueType();
+  unsigned NumElements = FullVT.getVectorMinNumElements();
+  unsigned NumSplits = 0;
+  EVT EltVT;
   switch (EltSize) {
   case 1:
-if (VT != MVT::v16i8 && VT != MVT::nxv16i1)
-  return SDValue();
+EltVT = MVT::i8;
 break;
   case 2:
-if (VT != MVT::v8i8 && VT != MVT::nxv8i1)
-  return SDValue();
+if (NumElements >= 16)
+  NumSplits = NumElements / 16;
+EltVT = MVT::i16;
 break;
   case 4:
-if (VT != MVT::v4i16 && VT != MVT::nxv4i1)
-  return SDValue();
+if (NumElements >= 8)
+  NumSplits = NumElements / 8;
+EltVT = MVT::i32;
 break;
   case 8:
-if (VT != MVT::v2i32 && VT != MVT::nxv2i1)
-  return SDValue();
+if (NumElements >= 4)
+  NumSplits = NumElements / 4;
+EltVT = MVT::i64;
 break;
   default:
 // Other element sizes are incompatible with whilewr/rw, so expand instead
 return SDValue();
   }
 
-  SDValue PtrA = Op.getOperand(0);
-  SDValue PtrB = Op.getOperand(1);
+  auto LowerToWhile = [&](EVT VT, unsigned AddrScale) {
+SDValue PtrA = Op.getOperand(0);
+SDValue PtrB = Op.getOperand(1);
 
-  if (VT.isScalableVT())
-return DAG.getNode(Op.getOpcode(), DL, VT, PtrA, PtrB, Op.getOperand(2));
+EVT StoreVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+   VT.getVectorMinNumElements(), false);
+if (AddrScale > 0) {
+  unsigned Offset = StoreVT.getStoreSizeInBits() / 8 * AddrScale;
+  SDValue Addend;
 
-  // We can use the SVE whilewr/whilerw instruction to lower this
-  // intrinsic by creating the appropriate sequence of scalable vector
-  // operations and then extracting a fixed-width subvector from the scalable
-  // vector. Scalable vector variants are already legal.
-  EVT ContainerVT =
-  EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
-   VT.getVectorNumElements(), true);
-  EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
+  if (VT.isScalableVT())
+Addend = DAG.getVScale(DL, MVT::i64, APInt(64, Offset));
+  else
+Addend = DAG.getConstant(Offset, DL, MVT::i64);
 
-  SDValue Mask =
-  DAG.getNode(Op.getOpcode(), DL, WhileVT, PtrA, PtrB, Op.getOperand(2));
-  SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, DL, ContainerVT, Mask);
-  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, MaskAsInt,
- DAG.getVectorIdxConstant(0, DL));
+  PtrA = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrA, Addend);
+  PtrB = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrB, Addend);
+}
+
+if (VT.isScalableVT())
+  return DAG.getNode(Op.getOpcode(), DL, VT, PtrA, PtrB, Op.getOperand(2));
+
+// We can use the SVE whilewr/whilerw instruction to lower this
+// intrinsic by creating the appropriate sequence of scalable vector
+// operations and then extracting a fixed-width subvector from the scalable
+// vector. Scalable vector variants are already legal.
+EVT ContainerVT =
+EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorNumElements(), true);
+EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
+
+SDValue Mask =
+DAG.getNode(Op.getOpcode(), DL, WhileVT, PtrA, PtrB, Op.getOperand(2));
+SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, DL, ContainerVT, Mask);
+return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, MaskAsInt,
+   DAG.getVectorIdxConstant(0, DL));
+  };
+
+  if (NumSplits == 0)
+return LowerToWhile(FullVT, 0);
+
+  SDValue FullVec = DAG.getUNDEF(FullVT);
+
+  unsigned NumElementsPerSplit = NumElements / (2 * NumSplits);
+  EVT PartVT =
+  EVT::getVectorVT(*DAG.getContext(), FullVT.getVectorElementType(),
+   NumElementsPerSplit, FullVT.isScalableVT());
+  for (unsigned Split = 0, InsertIdx = 0; Split < NumSplits;
+   Split++, InsertIdx += 2) {
+SDValue Low = LowerToWhile(PartVT, InsertIdx);
+SDValue High = LowerToWhile(PartVT, InsertIdx + 1);
+unsigned InsertIdxLow = InsertIdx * NumElementsPerSplit;
+unsigned InsertIdxHigh = (InsertIdx + 1) * NumElementsPerSplit;
+SDValue Insert =
+DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FullVT, FullVec, Low,
+DAG.getVectorIdxConstant(InsertIdxLow, DL));
+FullVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FullVT, Insert, High,
+  DAG.getVectorIdxConstant(InsertIdxHigh, DL));
+  }

MacDue wrote:

Following from my first suggestion (that adds `NumWhiles`) this can be 
simplified to:
```cpp
  if (NumWhiles <= 1)
return LowerToWhile(FullVT, 0);

  unsigned NumElementsPerSplit 

[llvm-branch-commits] [llvm] [AArch64] Split large loop dependence masks (PR #153187)

2025-08-20 Thread Benjamin Maxwell via llvm-branch-commits


@@ -5248,49 +5248,94 @@ 
AArch64TargetLowering::LowerLOOP_DEPENDENCE_MASK(SDValue Op,
  SelectionDAG &DAG) const {
   SDLoc DL(Op);
   uint64_t EltSize = Op.getConstantOperandVal(2);
-  EVT VT = Op.getValueType();
+  EVT FullVT = Op.getValueType();
+  unsigned NumElements = FullVT.getVectorMinNumElements();
+  unsigned NumSplits = 0;
+  EVT EltVT;
   switch (EltSize) {
   case 1:
-if (VT != MVT::v16i8 && VT != MVT::nxv16i1)
-  return SDValue();
+EltVT = MVT::i8;
 break;
   case 2:
-if (VT != MVT::v8i8 && VT != MVT::nxv8i1)
-  return SDValue();
+if (NumElements >= 16)
+  NumSplits = NumElements / 16;
+EltVT = MVT::i16;
 break;
   case 4:
-if (VT != MVT::v4i16 && VT != MVT::nxv4i1)
-  return SDValue();
+if (NumElements >= 8)
+  NumSplits = NumElements / 8;
+EltVT = MVT::i32;
 break;
   case 8:
-if (VT != MVT::v2i32 && VT != MVT::nxv2i1)
-  return SDValue();
+if (NumElements >= 4)
+  NumSplits = NumElements / 4;
+EltVT = MVT::i64;
 break;
   default:
 // Other element sizes are incompatible with whilewr/rw, so expand instead
 return SDValue();
   }
 
-  SDValue PtrA = Op.getOperand(0);
-  SDValue PtrB = Op.getOperand(1);
+  auto LowerToWhile = [&](EVT VT, unsigned AddrScale) {
+SDValue PtrA = Op.getOperand(0);
+SDValue PtrB = Op.getOperand(1);
 
-  if (VT.isScalableVT())
-return DAG.getNode(Op.getOpcode(), DL, VT, PtrA, PtrB, Op.getOperand(2));
+EVT StoreVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+   VT.getVectorMinNumElements(), false);
+if (AddrScale > 0) {
+  unsigned Offset = StoreVT.getStoreSizeInBits() / 8 * AddrScale;
+  SDValue Addend;
 
-  // We can use the SVE whilewr/whilerw instruction to lower this
-  // intrinsic by creating the appropriate sequence of scalable vector
-  // operations and then extracting a fixed-width subvector from the scalable
-  // vector. Scalable vector variants are already legal.
-  EVT ContainerVT =
-  EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
-   VT.getVectorNumElements(), true);
-  EVT WhileVT = ContainerVT.changeElementType(MVT::i1);
+  if (VT.isScalableVT())
+Addend = DAG.getVScale(DL, MVT::i64, APInt(64, Offset));
+  else
+Addend = DAG.getConstant(Offset, DL, MVT::i64);
 
-  SDValue Mask =
-  DAG.getNode(Op.getOpcode(), DL, WhileVT, PtrA, PtrB, Op.getOperand(2));
-  SDValue MaskAsInt = DAG.getNode(ISD::SIGN_EXTEND, DL, ContainerVT, Mask);
-  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, MaskAsInt,
- DAG.getVectorIdxConstant(0, DL));
+  PtrA = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrA, Addend);
+  PtrB = DAG.getNode(ISD::ADD, DL, MVT::i64, PtrB, Addend);
+}
+
+if (VT.isScalableVT())
+  return DAG.getNode(Op.getOpcode(), DL, VT, PtrA, PtrB, Op.getOperand(2));
+
+// We can use the SVE whilewr/whilerw instruction to lower this
+// intrinsic by creating the appropriate sequence of scalable vector
+// operations and then extracting a fixed-width subvector from the scalable
+// vector. Scalable vector variants are already legal.

MacDue wrote:

This seems to assuming SVE is available to lower fixed-size masks. What happens 
if that's not the case?

https://github.com/llvm/llvm-project/pull/153187
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [NFC][Clang][Docs] Update Pointer Authentication documentation (#152596) (PR #154240)

2025-08-20 Thread Kristof Beyls via llvm-branch-commits

kbeyls wrote:

It probably makes sense to backport this to the release branch, but @ojhunt 
would be much better to make that decision. @ojhunt : What do you think about 
merging this back onto the LLVM 21 release branch?

https://github.com/llvm/llvm-project/pull/154240
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [mlir] [Flang][OpenMP][MLIR] Initial declare target to for variables implementation (PR #119589)

2025-08-20 Thread via llvm-branch-commits


@@ -3664,6 +3690,30 @@ struct MapInfoData : MapInfosTy {
 MapInfosTy::append(CurInfo);
   }
 };
+
+enum class TargetDirective : uint32_t {
+  None = 0,
+  Target = 1,
+  TargetData = 2,
+  TargetEnterData = 3,
+  TargetExitData = 4,
+  TargetUpdate = 5
+};

agozillon wrote:

It's never been defined before, this is the only location it has ever been 
defined. The references are because it's layered on top of the other PR that 
also makes alterations to this file that utilise it. The PRs do not function 
without each other and depend on each other, hence the stack. In hindsight it 
might have been better to introduce this type in the previous PR though or in 
an alternative patch. 

https://github.com/llvm/llvm-project/pull/119589
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread Krzysztof Parzyszek via llvm-branch-commits


@@ -4497,6 +4576,22 @@ void OmpStructureChecker::CheckWorkshareBlockStmts(
   }
 }
 
+void OmpStructureChecker::CheckWorkdistributeBlockStmts(
+const parser::Block &block, parser::CharBlock source) {
+  OmpWorkdistributeBlockChecker ompWorkdistributeBlockChecker{context_, 
source};
+
+  for (auto it{block.begin()}; it != block.end(); ++it) {
+if (parser::Unwrap(*it)) {
+  parser::Walk(*it, ompWorkdistributeBlockChecker);
+} else {
+  context_.Say(source,
+  "The structured block in a WORKDISTRIBUTE construct may consist of "
+  "only "
+  "SCALAR or ARRAY assignments"_err_en_US);

kparzysz wrote:

Same thing about concatenating...

https://github.com/llvm/llvm-project/pull/154377
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread Krzysztof Parzyszek via llvm-branch-commits


@@ -141,6 +141,67 @@ class OmpWorkshareBlockChecker {
   parser::CharBlock source_;
 };
 
+// 'OmpWorkdistributeBlockChecker' is used to check the validity of the
+// assignment statements and the expressions enclosed in an OpenMP
+// workdistribute construct
+class OmpWorkdistributeBlockChecker {
+public:
+  OmpWorkdistributeBlockChecker(
+  SemanticsContext &context, parser::CharBlock source)
+  : context_{context}, source_{source} {}
+
+  template  bool Pre(const T &) { return true; }
+  template  void Post(const T &) {}
+
+  bool Pre(const parser::AssignmentStmt &assignment) {
+const auto &var{std::get(assignment.t)};
+const auto &expr{std::get(assignment.t)};
+const auto *lhs{GetExpr(context_, var)};
+const auto *rhs{GetExpr(context_, expr)};
+if (lhs && rhs) {
+  Tristate isDefined{semantics::IsDefinedAssignment(
+  lhs->GetType(), lhs->Rank(), rhs->GetType(), rhs->Rank())};
+  if (isDefined == Tristate::Yes) {
+context_.Say(expr.source,
+"Defined assignment statement is not "
+"allowed in a WORKDISTRIBUTE construct"_err_en_US);
+  }
+}
+return true;
+  }
+
+  bool Pre(const parser::Expr &expr) {
+if (const auto *e{GetExpr(context_, expr)}) {
+  for (const Symbol &symbol : evaluate::CollectSymbols(*e)) {
+const Symbol &root{GetAssociationRoot(symbol)};
+if (IsFunction(root)) {
+  std::string attrs{""};
+  if (!IsElementalProcedure(root)) {
+attrs = " non-ELEMENTAL";
+  }
+  if (root.attrs().test(Attr::IMPURE)) {
+if (attrs != "") {
+  attrs = "," + attrs;
+}
+attrs = " IMPURE" + attrs;
+  }
+  if (attrs != "") {
+context_.Say(expr.source,
+"User defined%s function '%s' is not allowed in a "
+"WORKDISTRIBUTE construct"_err_en_US,

kparzysz wrote:

Same here.

https://github.com/llvm/llvm-project/pull/154377
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread Krzysztof Parzyszek via llvm-branch-commits


@@ -813,6 +874,13 @@ void OmpStructureChecker::Enter(const 
parser::OpenMPBlockConstruct &x) {
   "TARGET construct with nested TEAMS region contains statements or "
   "directives outside of the TEAMS construct"_err_en_US);
 }
+if (GetContext().directive == llvm::omp::Directive::OMPD_workdistribute &&
+GetContextParent().directive != llvm::omp::Directive::OMPD_teams) {
+  context_.Say(x.BeginDir().DirName().source,
+  "%s region can only be strictly nested within the "
+  "teams region"_err_en_US,

kparzysz wrote:

Please concat message.

https://github.com/llvm/llvm-project/pull/154377
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread Krzysztof Parzyszek via llvm-branch-commits


@@ -141,6 +141,67 @@ class OmpWorkshareBlockChecker {
   parser::CharBlock source_;
 };
 
+// 'OmpWorkdistributeBlockChecker' is used to check the validity of the
+// assignment statements and the expressions enclosed in an OpenMP
+// workdistribute construct
+class OmpWorkdistributeBlockChecker {
+public:
+  OmpWorkdistributeBlockChecker(
+  SemanticsContext &context, parser::CharBlock source)
+  : context_{context}, source_{source} {}
+
+  template  bool Pre(const T &) { return true; }
+  template  void Post(const T &) {}
+
+  bool Pre(const parser::AssignmentStmt &assignment) {
+const auto &var{std::get(assignment.t)};
+const auto &expr{std::get(assignment.t)};
+const auto *lhs{GetExpr(context_, var)};
+const auto *rhs{GetExpr(context_, expr)};
+if (lhs && rhs) {
+  Tristate isDefined{semantics::IsDefinedAssignment(
+  lhs->GetType(), lhs->Rank(), rhs->GetType(), rhs->Rank())};
+  if (isDefined == Tristate::Yes) {
+context_.Say(expr.source,
+"Defined assignment statement is not "
+"allowed in a WORKDISTRIBUTE construct"_err_en_US);
+  }
+}
+return true;
+  }
+
+  bool Pre(const parser::Expr &expr) {
+if (const auto *e{GetExpr(context_, expr)}) {

kparzysz wrote:

Consider an early return if `e` is nullptr.

https://github.com/llvm/llvm-project/pull/154377
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][openmp] Add parser/semantic support for workdistribute (PR #154377)

2025-08-20 Thread Krzysztof Parzyszek via llvm-branch-commits


@@ -141,6 +141,67 @@ class OmpWorkshareBlockChecker {
   parser::CharBlock source_;
 };
 
+// 'OmpWorkdistributeBlockChecker' is used to check the validity of the
+// assignment statements and the expressions enclosed in an OpenMP
+// workdistribute construct
+class OmpWorkdistributeBlockChecker {
+public:
+  OmpWorkdistributeBlockChecker(
+  SemanticsContext &context, parser::CharBlock source)
+  : context_{context}, source_{source} {}
+
+  template  bool Pre(const T &) { return true; }
+  template  void Post(const T &) {}
+
+  bool Pre(const parser::AssignmentStmt &assignment) {
+const auto &var{std::get(assignment.t)};
+const auto &expr{std::get(assignment.t)};
+const auto *lhs{GetExpr(context_, var)};
+const auto *rhs{GetExpr(context_, expr)};
+if (lhs && rhs) {
+  Tristate isDefined{semantics::IsDefinedAssignment(
+  lhs->GetType(), lhs->Rank(), rhs->GetType(), rhs->Rank())};
+  if (isDefined == Tristate::Yes) {
+context_.Say(expr.source,
+"Defined assignment statement is not "
+"allowed in a WORKDISTRIBUTE construct"_err_en_US);

kparzysz wrote:

Please concatenate the parts of error messages.  This makes it easier to grep 
sources for a message.

https://github.com/llvm/llvm-project/pull/154377
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


  1   2   >