[llvm-branch-commits] [clang] [libc] release/20.x: [Clang] Add handlers for 'match_any' and 'match_all' to `gpuintrin.h` (#127504) (PR #127704)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

tstellar wrote:

@jhuber6 Why do you want to back port this and what's the impact if we don't?

https://github.com/llvm/llvm-project/pull/127704
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [libc] release/20.x: [Clang] Fix cross-lane scan when given divergent lanes (#127703) (PR #128085)

2025-02-20 Thread Shilei Tian via llvm-branch-commits

shiltian wrote:

LGTM

https://github.com/llvm/llvm-project/pull/128085
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [libc] release/20.x: [Clang] Fix cross-lane scan when given divergent lanes (#127703) (PR #128085)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: None (llvmbot)


Changes

Backport 6cc7ca084a5bbb7ccf606cab12065604453dde59

Requested by: @jhuber6

---
Full diff: https://github.com/llvm/llvm-project/pull/128085.diff


3 Files Affected:

- (modified) clang/lib/Headers/gpuintrin.h (+49-25) 
- (modified) clang/lib/Headers/nvptxintrin.h (+4-1) 
- (modified) libc/test/integration/src/__support/GPU/scan_reduce.cpp (+49) 


``diff
diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index 11c87e85cd497..efdc3d94ac0b3 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -150,35 +150,33 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t 
__idx, double __x,
 __builtin_bit_cast(uint64_t, __x), __width));
 }
 
-// Gets the sum of all lanes inside the warp or wavefront.
-#define __DO_LANE_SUM(__type, __suffix)
\
-  _DEFAULT_FN_ATTRS static __inline__ __type __gpu_lane_sum_##__suffix(
\
-  uint64_t __lane_mask, __type __x) {  
\
-for (uint32_t __step = __gpu_num_lanes() / 2; __step > 0; __step /= 2) {   
\
-  uint32_t __index = __step + __gpu_lane_id(); 
\
-  __x += __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x,   
\
-  __gpu_num_lanes());  
\
-}  
\
-return __gpu_read_first_lane_##__suffix(__lane_mask, __x); 
\
-  }
-__DO_LANE_SUM(uint32_t, u32); // uint32_t __gpu_lane_sum_u32(m, x)
-__DO_LANE_SUM(uint64_t, u64); // uint64_t __gpu_lane_sum_u64(m, x)
-__DO_LANE_SUM(float, f32);// float __gpu_lane_sum_f32(m, x)
-__DO_LANE_SUM(double, f64);   // double __gpu_lane_sum_f64(m, x)
-#undef __DO_LANE_SUM
-
 // Gets the accumulator scan of the threads in the warp or wavefront.
 #define __DO_LANE_SCAN(__type, __bitmask_type, __suffix)   
\
   _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_scan_##__suffix( 
\
   uint64_t __lane_mask, uint32_t __x) {
\
-for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) {   
\
-  uint32_t __index = __gpu_lane_id() - __step; 
\
-  __bitmask_type bitmask = __gpu_lane_id() >= __step;  
\
-  __x += __builtin_bit_cast(   
\
-  __type, -bitmask & __builtin_bit_cast(__bitmask_type,
\
-__gpu_shuffle_idx_##__suffix(  
\
-__lane_mask, __index, __x, 
\
-__gpu_num_lanes(;  
\
+uint64_t __first = __lane_mask >> __builtin_ctzll(__lane_mask);
\
+bool __divergent = __gpu_read_first_lane_##__suffix(   
\
+__lane_mask, __first & (__first + 1)); 
\
+if (__divergent) { 
\
+  __type __accum = 0;  
\
+  for (uint64_t __mask = __lane_mask; __mask; __mask &= __mask - 1) {  
\
+__type __index = __builtin_ctzll(__mask);  
\
+__type __tmp = __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x, 
\
+__gpu_num_lanes());
\
+__x = __gpu_lane_id() == __index ? __accum + __tmp : __x;  
\
+__accum += __tmp;  
\
+  }
\
+} else {   
\
+  for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { 
\
+uint32_t __index = __gpu_lane_id() - __step;   
\
+__bitmask_type bitmask = __gpu_lane_id() >= __step;
\
+__x += __builtin_bit_cast( 
\
+__type,
\
+-bitmask & __builtin_bit_cast(__bitmask_type,  
\
+  __gpu_shuffle_idx_##__suffix(
\
+  __lane_mask, __index, __x,   
\
+  __gpu_num_lanes(;
\
+  }
\
 }  
\
 return __x;
\
   }
@@ -188,6 +186,32 @@ _

[llvm-branch-commits] [clang] release/20.x: Revert "[C++20][Modules][Serialization] Delay marking pending incompl… (#127136) (PR #127252)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/127252

>From 99947c59de7b8ecbdda2a8b8ce78abc3083adee0 Mon Sep 17 00:00:00 2001
From: Zixu Wang <9819235+zix...@users.noreply.github.com>
Date: Thu, 13 Feb 2025 16:12:22 -0800
Subject: [PATCH] =?UTF-8?q?Revert=20"[C++20][Modules][Serialization]=20Del?=
 =?UTF-8?q?ay=20marking=20pending=20incompl=E2=80=A6=20(#127136)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

…ete decl chains until the end of `finishPendingActions`. (#121245)"

This reverts commit a9e249f64e800fbb20a3b26c0cfb68c1a1aee5e1.

Reverting this change because of issue #126973.

(cherry picked from commit 912b154f3a3f8c3cebf5cc5731fd8b0749762da5)
---
 clang/lib/Serialization/ASTReader.cpp | 25 +++
 clang/test/Modules/pr121245.cpp   | 93 ---
 2 files changed, 13 insertions(+), 105 deletions(-)
 delete mode 100644 clang/test/Modules/pr121245.cpp

diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 24acd6e297e71..f524251c48ddd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -10186,12 +10186,12 @@ void ASTReader::visitTopLevelModuleMaps(
 }
 
 void ASTReader::finishPendingActions() {
-  while (!PendingIdentifierInfos.empty() ||
- !PendingDeducedFunctionTypes.empty() ||
- !PendingDeducedVarTypes.empty() || !PendingDeclChains.empty() ||
- !PendingMacroIDs.empty() || !PendingDeclContextInfos.empty() ||
- !PendingUpdateRecords.empty() ||
- !PendingObjCExtensionIvarRedeclarations.empty()) {
+  while (
+  !PendingIdentifierInfos.empty() || !PendingDeducedFunctionTypes.empty() 
||
+  !PendingDeducedVarTypes.empty() || !PendingIncompleteDeclChains.empty() 
||
+  !PendingDeclChains.empty() || !PendingMacroIDs.empty() ||
+  !PendingDeclContextInfos.empty() || !PendingUpdateRecords.empty() ||
+  !PendingObjCExtensionIvarRedeclarations.empty()) {
 // If any identifiers with corresponding top-level declarations have
 // been loaded, load those declarations now.
 using TopLevelDeclsMap =
@@ -10239,6 +10239,13 @@ void ASTReader::finishPendingActions() {
 }
 PendingDeducedVarTypes.clear();
 
+// For each decl chain that we wanted to complete while deserializing, mark
+// it as "still needs to be completed".
+for (unsigned I = 0; I != PendingIncompleteDeclChains.size(); ++I) {
+  markIncompleteDeclChain(PendingIncompleteDeclChains[I]);
+}
+PendingIncompleteDeclChains.clear();
+
 // Load pending declaration chains.
 for (unsigned I = 0; I != PendingDeclChains.size(); ++I)
   loadPendingDeclChain(PendingDeclChains[I].first,
@@ -10476,12 +10483,6 @@ void ASTReader::finishPendingActions() {
   for (auto *ND : PendingMergedDefinitionsToDeduplicate)
 getContext().deduplicateMergedDefinitonsFor(ND);
   PendingMergedDefinitionsToDeduplicate.clear();
-
-  // For each decl chain that we wanted to complete while deserializing, mark
-  // it as "still needs to be completed".
-  for (Decl *D : PendingIncompleteDeclChains)
-markIncompleteDeclChain(D);
-  PendingIncompleteDeclChains.clear();
 }
 
 void ASTReader::diagnoseOdrViolations() {
diff --git a/clang/test/Modules/pr121245.cpp b/clang/test/Modules/pr121245.cpp
deleted file mode 100644
index 0e276ad0e435d..0
--- a/clang/test/Modules/pr121245.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-// If this test fails, it should be investigated under Debug builds.
-// Before the PR, this test was encountering an `llvm_unreachable()`.
-
-// RUN: rm -rf %t
-// RUN: mkdir -p %t
-// RUN: split-file %s %t
-// RUN: cd %t
-
-// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header %t/hu-01.h \
-// RUN:  -fcxx-exceptions -o %t/hu-01.pcm
-
-// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header %t/hu-02.h \
-// RUN:  -Wno-experimental-header-units -fcxx-exceptions \
-// RUN:  -fmodule-file=%t/hu-01.pcm -o %t/hu-02.pcm
-
-// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header %t/hu-03.h \
-// RUN:  -Wno-experimental-header-units -fcxx-exceptions \
-// RUN:  -fmodule-file=%t/hu-01.pcm -o %t/hu-03.pcm
-
-// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header %t/hu-04.h \
-// RUN:  -Wno-experimental-header-units -fcxx-exceptions \
-// RUN:  -fmodule-file=%t/hu-01.pcm -o %t/hu-04.pcm
-
-// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header %t/hu-05.h \
-// RUN:  -Wno-experimental-header-units -fcxx-exceptions \
-// RUN:  -fmodule-file=%t/hu-03.pcm -fmodule-file=%t/hu-04.pcm \
-// RUN:  -fmodule-file=%t/hu-01.pcm -o %t/hu-05.pcm
-
-// RUN: %clang_cc1 -std=c++20 -emit-obj %t/main.cpp \
-// RUN:  -Wno-experimental-header-units -fcxx-exceptions \
-// RUN:  -fmodule-file=%t/hu-02.pcm -fmodule-file=%t/hu-05.pcm \
-// RUN:  -fmodule-file=%t/hu-04.pcm -fmodule-file=%t/hu-03.pcm \
-// RUN:  -fmodule-file=%t/hu-01.pcm
-
-//--- hu-01.

[llvm-branch-commits] [clang] [libc] release/20.x: [Clang] Fix cross-lane scan when given divergent lanes (#127703) (PR #128085)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:

@shiltian What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/128085
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [libc] release/20.x: [Clang] Fix cross-lane scan when given divergent lanes (#127703) (PR #128085)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/128085
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add some release 20 notes (PR #128136)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/128136
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add some release 20 notes (PR #128136)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/128136

None

>From 0bb2ec6c4fc3c15e17a6c538af04f1668bea9cb6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 21 Feb 2025 12:49:59 +0700
Subject: [PATCH] AMDGPU: Add some release 20 notes

---
 llvm/docs/ReleaseNotes.md | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index c80aecfdea084..e654509792652 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -159,6 +159,17 @@ Changes to the AArch64 Backend
 Changes to the AMDGPU Backend
 -
 
+* Initial support for gfx950
+
+* Improved ``llvm.memcpy``, ``llvm.memmove`` and ``llvm.memset`` lowering
+
+* Fixed expansion of 64-bit flat address space ``atomicrmw`` and
+  ``cmpxchg`` operations which may access private
+  memory. `noalias.addrspace` metadat may be used to avoid the
+  expansion if the target address is known to not be on the stack.
+
+* Fix compile failures when emitting unreachable functions.
+
 * Removed `llvm.amdgcn.flat.atomic.fadd` and
   `llvm.amdgcn.global.atomic.fadd` intrinsics. Users should use the
   {ref}`atomicrmw ` instruction with `fadd` and

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add some release 20 notes (PR #128136)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/128136.diff


1 Files Affected:

- (modified) llvm/docs/ReleaseNotes.md (+11) 


``diff
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index c80aecfdea084..e654509792652 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -159,6 +159,17 @@ Changes to the AArch64 Backend
 Changes to the AMDGPU Backend
 -
 
+* Initial support for gfx950
+
+* Improved ``llvm.memcpy``, ``llvm.memmove`` and ``llvm.memset`` lowering
+
+* Fixed expansion of 64-bit flat address space ``atomicrmw`` and
+  ``cmpxchg`` operations which may access private
+  memory. `noalias.addrspace` metadat may be used to avoid the
+  expansion if the target address is known to not be on the stack.
+
+* Fix compile failures when emitting unreachable functions.
+
 * Removed `llvm.amdgcn.flat.atomic.fadd` and
   `llvm.amdgcn.global.atomic.fadd` intrinsics. Users should use the
   {ref}`atomicrmw ` instruction with `fadd` and

``




https://github.com/llvm/llvm-project/pull/128136
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add some release 20 notes (PR #128136)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm milestoned 
https://github.com/llvm/llvm-project/pull/128136
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] HIP: Use builtin_nan instead of manual expansion (PR #128023)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> why do we want to do this if it is broken for non-literal string?

It's WIP from 3 years ago, I'm putting this up for reference 

https://github.com/llvm/llvm-project/pull/128023
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: Add Wasm, RISC-V, BPF, and NVPTX targets back to Windows release packaging (#127794) (PR #127982)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/127982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: Add Wasm, RISC-V, BPF, and NVPTX targets back to Windows release packaging (#127794) (PR #127982)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/127982

Backport 6e047a5

Requested by: @zmodem

>From 3abcea706bea1b5487eaa94abae417ceaae18423 Mon Sep 17 00:00:00 2001
From: Hans Wennborg 
Date: Thu, 20 Feb 2025 11:02:33 +0100
Subject: [PATCH] Add Wasm, RISC-V, BPF, and NVPTX targets back to Windows
 release packaging (#127794)

In #106059 we reduced the targets to those supported by Windows (X86 and
ARM) to avoid running into size limitations of the NSIS compiler.

Since then, people complained about the lack of Wasm [1], RISC-V [2],
BPF [3], and NVPTX [4]. These do seem to fit in the installer (at least
for 20.1.0-rc2), so let's add them back.

[1]
https://discourse.llvm.org/t/llvm-19-x-release-third-party-binaries/80374/26
[2]
https://discourse.llvm.org/t/llvm-19-x-release-third-party-binaries/80374/53
[3] https://github.com/llvm/llvm-project/issues/127120
[4]
https://github.com/llvm/llvm-project/pull/127794#issuecomment-2668677203

(cherry picked from commit 6e047a5ab42698165a4746ef681396fab1698327)
---
 llvm/utils/release/build_llvm_release.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/release/build_llvm_release.bat 
b/llvm/utils/release/build_llvm_release.bat
index dd041d7d384ec..1c30673cf88bd 100755
--- a/llvm/utils/release/build_llvm_release.bat
+++ b/llvm/utils/release/build_llvm_release.bat
@@ -150,7 +150,7 @@ set common_cmake_flags=^
   -DCMAKE_BUILD_TYPE=Release ^
   -DLLVM_ENABLE_ASSERTIONS=OFF ^
   -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON ^
-  -DLLVM_TARGETS_TO_BUILD="AArch64;ARM;X86" ^
+  -DLLVM_TARGETS_TO_BUILD="AArch64;ARM;X86;BPF;WebAssembly;RISCV;NVPTX" ^
   -DLLVM_BUILD_LLVM_C_DYLIB=ON ^
   -DCMAKE_INSTALL_UCRT_LIBRARIES=ON ^
   -DPython3_FIND_REGISTRY=NEVER ^

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: Add Wasm, RISC-V, BPF, and NVPTX targets back to Windows release packaging (#127794) (PR #127982)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:

@tstellar What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/127982
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [RegAlloc][NewPM] Plug Greedy RA in codegen pipeline (PR #120557)

2025-02-20 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/120557

>From 72fe9343aaaffb5e1f8f242def12a6ba0070b106 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 11 Feb 2025 12:36:40 +
Subject: [PATCH 1/5] [CodeGen][NewPM] Plug greedy RA in codegen pipeline

---
 llvm/include/llvm/Passes/CodeGenPassBuilder.h | 51 ++-
 .../llvm/Passes/MachinePassRegistry.def   |  4 +-
 .../include/llvm/Target/CGPassBuilderOption.h |  4 +-
 llvm/lib/Passes/PassBuilder.cpp   | 14 +
 ...plicit-def-remat-requires-impdef-check.mir |  1 +
 ...implicit-def-with-impdef-greedy-assert.mir |  1 +
 llvm/test/CodeGen/AArch64/pr51516.mir |  1 +
 llvm/test/CodeGen/AArch64/spill-fold.mir  |  2 +
 llvm/test/CodeGen/MIR/Generic/runPass.mir |  1 +
 .../SystemZ/clear-liverange-spillreg.mir  |  1 +
 llvm/test/CodeGen/Thumb/high-reg-clobber.mir  |  1 +
 llvm/test/CodeGen/X86/limit-split-cost.mir|  1 +
 llvm/tools/llc/NewPMDriver.cpp| 15 --
 13 files changed, 75 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index ca065d67eacef..d895eee9bf4da 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1062,7 +1062,9 @@ void CodeGenPassBuilder::addMachineSSAOptimization(
 ///
 /// A target that uses the standard regalloc pass order for fast or optimized
 /// allocation may still override this for per-target regalloc
-/// selection. But -regalloc=... always takes precedence.
+/// selection. But -regalloc-npm=... always takes precedence.
+/// If a target does not want to allow users to set -regalloc-npm=... at all,
+/// check if Opt.RegAlloc == RegAllocType::Unset.
 template 
 void CodeGenPassBuilder::addTargetRegisterAllocator(
 AddMachinePass &addPass, bool Optimized) const {
@@ -1075,10 +1077,29 @@ void CodeGenPassBuilder::addTargetRegisterAllocator(
 /// Find and instantiate the register allocation pass requested by this target
 /// at the current optimization level.  Different register allocators are
 /// defined as separate passes because they may require different analysis.
+///
+/// This helper ensures that the -regalloc-npm= option is always available,
+/// even for targets that override the default allocator.
 template 
 void CodeGenPassBuilder::addRegAllocPass(
 AddMachinePass &addPass, bool Optimized) const {
-  // TODO: Parse Opt.RegAlloc to add register allocator.
+  // Use the specified -regalloc-npm={basic|greedy|fast|pbqp}
+  if (Opt.RegAlloc > RegAllocType::Default) {
+switch (Opt.RegAlloc) {
+case RegAllocType::Fast:
+  addPass(RegAllocFastPass());
+  break;
+case RegAllocType::Greedy:
+  addPass(RAGreedyPass());
+  break;
+default:
+  report_fatal_error("register allocator not supported yet.", false);
+}
+return;
+  }
+  // -regalloc=default or unspecified, so pick based on the optimization level
+  // or ask the target for the regalloc pass.
+  derived().addTargetRegisterAllocator(addPass, Optimized);
 }
 
 template 
@@ -1149,20 +1170,22 @@ void CodeGenPassBuilder::addOptimizedRegAlloc(
   // PreRA instruction scheduling.
   addPass(MachineSchedulerPass(&TM));
 
-  if (derived().addRegAssignmentOptimized(addPass)) {
-// Allow targets to expand pseudo instructions depending on the choice of
-// registers before MachineCopyPropagation.
-derived().addPostRewrite(addPass);
+  if (auto E = derived().addRegAssignmentOptimized(addPass)) {
+// addRegAssignmentOptimized did not add a reg alloc pass, so do nothing.
+return;
+  }
+  // Allow targets to expand pseudo instructions depending on the choice of
+  // registers before MachineCopyPropagation.
+  derived().addPostRewrite(addPass);
 
-// Copy propagate to forward register uses and try to eliminate COPYs that
-// were not coalesced.
-addPass(MachineCopyPropagationPass());
+  // Copy propagate to forward register uses and try to eliminate COPYs that
+  // were not coalesced.
+  addPass(MachineCopyPropagationPass());
 
-// Run post-ra machine LICM to hoist reloads / remats.
-//
-// FIXME: can this move into MachineLateOptimization?
-addPass(MachineLICMPass());
-  }
+  // Run post-ra machine LICM to hoist reloads / remats.
+  //
+  // FIXME: can this move into MachineLateOptimization?
+  addPass(MachineLICMPass());
 }
 
 //===-===//
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 45abbc3b02e75..3199337f065fb 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -195,12 +195,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
 },
 "filter=reg-filter;no-clear-vregs")
 
+// 'all' is the default filter
 MACHINE_FUNCTION_PASS_WITH_PARAMS(

[llvm-branch-commits] [llvm] [RegAllocFast][NPM] Make RegAllocFastPassOptions a nested class (PR #127984)

2025-02-20 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan created 
https://github.com/llvm/llvm-project/pull/127984

Making all reg alloc classes have an `::Option` class makes things nicer to 
construct them.

>From fd718fac50a728bd4b5312689b64a2f519b1e27a Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 18 Feb 2025 04:55:35 +
Subject: [PATCH] [RegAlloc][NPM] Make RegAllocFastPassOptions a nested class

Making all reg alloc classes have an `::Option` class makes things nicer
to construct them.
---
 llvm/include/llvm/CodeGen/RegAllocFast.h  | 24 +++
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/Passes/PassBuilder.cpp   |  4 ++--
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/RegAllocFast.h 
b/llvm/include/llvm/CodeGen/RegAllocFast.h
index b2ca9e10bf464..015b666400e05 100644
--- a/llvm/include/llvm/CodeGen/RegAllocFast.h
+++ b/llvm/include/llvm/CodeGen/RegAllocFast.h
@@ -9,23 +9,24 @@
 #ifndef LLVM_CODEGEN_REGALLOCFAST_H
 #define LLVM_CODEGEN_REGALLOCFAST_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/RegAllocCommon.h"
 
 namespace llvm {
 
-struct RegAllocFastPassOptions {
-  RegAllocFilterFunc Filter = nullptr;
-  StringRef FilterName = "all";
-  bool ClearVRegs = true;
-};
-
 class RegAllocFastPass : public PassInfoMixin {
-  RegAllocFastPassOptions Opts;
-
 public:
-  RegAllocFastPass(RegAllocFastPassOptions Opts = RegAllocFastPassOptions())
-  : Opts(Opts) {}
+  struct Options {
+RegAllocFilterFunc Filter;
+StringRef FilterName;
+bool ClearVRegs;
+Options(RegAllocFilterFunc F = nullptr, StringRef FN = "all",
+bool CV = true)
+: Filter(F), FilterName(FN), ClearVRegs(CV) {}
+  };
+
+  RegAllocFastPass(Options Opts = Options()) : Opts(Opts) {}
 
   MachineFunctionProperties getRequiredProperties() const {
 return MachineFunctionProperties().set(
@@ -52,6 +53,9 @@ class RegAllocFastPass : public 
PassInfoMixin {
  function_ref MapClassName2PassName);
 
   static bool isRequired() { return true; }
+
+private:
+  Options Opts;
 };
 
 } // namespace llvm
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index bb1a59a9c4ed3..31f260c560dd6 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -189,7 +189,7 @@ MACHINE_FUNCTION_PASS("verify", 
MachineTraceMetricsVerifi
 #endif
 MACHINE_FUNCTION_PASS_WITH_PARAMS(
 "regallocfast", "RegAllocFastPass",
-[](RegAllocFastPassOptions Opts) { return RegAllocFastPass(Opts); },
+[](RegAllocFastPass::Options Opts) { return RegAllocFastPass(Opts); },
 [PB = this](StringRef Params) {
   return parseRegAllocFastPassOptions(*PB, Params);
 },
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 4e21ef0704e5d..614cbe85ff2bd 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1333,9 +1333,9 @@ Expected> 
parseInternalizeGVs(StringRef Params) {
   return Expected>(std::move(PreservedGVs));
 }
 
-Expected
+Expected
 parseRegAllocFastPassOptions(PassBuilder &PB, StringRef Params) {
-  RegAllocFastPassOptions Opts;
+  RegAllocFastPass::Options Opts;
   while (!Params.empty()) {
 StringRef ParamName;
 std::tie(ParamName, Params) = Params.split(';');

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] e082cbf - Revert "[clang] Lower modf builtin using `llvm.modf` intrinsic (#126750)"

2025-02-20 Thread via llvm-branch-commits

Author: Benjamin Maxwell
Date: 2025-02-20T10:24:31Z
New Revision: e082cbf5fd4f889762e12062d2544f862339b959

URL: 
https://github.com/llvm/llvm-project/commit/e082cbf5fd4f889762e12062d2544f862339b959
DIFF: 
https://github.com/llvm/llvm-project/commit/e082cbf5fd4f889762e12062d2544f862339b959.diff

LOG: Revert "[clang] Lower modf builtin using `llvm.modf` intrinsic (#126750)"

This reverts commit d804c838933b1f35ae56343afac669ffe3bbd957.

Added: 


Modified: 
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/X86/math-builtins.c
clang/test/CodeGen/aix-builtin-mapping.c
clang/test/CodeGen/builtin-attributes.c
clang/test/CodeGen/math-builtins-long.c
clang/test/CodeGen/math-libcalls.c

Removed: 




diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a73ba1ff138fb..4625bf8088be6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -859,24 +859,6 @@ static void emitSincosBuiltin(CodeGenFunction &CGF, const 
CallExpr *E,
   StoreCos->setMetadata(LLVMContext::MD_noalias, AliasScopeList);
 }
 
-static llvm::Value *emitModfBuiltin(CodeGenFunction &CGF, const CallExpr *E,
-llvm::Intrinsic::ID IntrinsicID) {
-  llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(0));
-  llvm::Value *IntPartDest = CGF.EmitScalarExpr(E->getArg(1));
-
-  llvm::Value *Call =
-  CGF.Builder.CreateIntrinsic(IntrinsicID, {Val->getType()}, Val);
-
-  llvm::Value *FractionalResult = CGF.Builder.CreateExtractValue(Call, 0);
-  llvm::Value *IntegralResult = CGF.Builder.CreateExtractValue(Call, 1);
-
-  QualType DestPtrType = E->getArg(1)->getType()->getPointeeType();
-  LValue IntegralLV = CGF.MakeNaturalAlignAddrLValue(IntPartDest, DestPtrType);
-  CGF.EmitStoreOfScalar(IntegralResult, IntegralLV);
-
-  return FractionalResult;
-}
-
 /// EmitFAbs - Emit a call to @llvm.fabs().
 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
   Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
@@ -4130,15 +4112,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
   case Builtin::BI__builtin_frexpf128:
   case Builtin::BI__builtin_frexpf16:
 return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));
-  case Builtin::BImodf:
-  case Builtin::BImodff:
-  case Builtin::BImodfl:
-  case Builtin::BI__builtin_modf:
-  case Builtin::BI__builtin_modff:
-  case Builtin::BI__builtin_modfl:
-if (Builder.getIsFPConstrained())
-  break; // TODO: Emit constrained modf intrinsic once one exists.
-return RValue::get(emitModfBuiltin(*this, E, Intrinsic::modf));
   case Builtin::BI__builtin_isgreater:
   case Builtin::BI__builtin_isgreaterequal:
   case Builtin::BI__builtin_isless:

diff  --git a/clang/test/CodeGen/X86/math-builtins.c 
b/clang/test/CodeGen/X86/math-builtins.c
index d5301b7bafd9c..d7bf7d57fba26 100644
--- a/clang/test/CodeGen/X86/math-builtins.c
+++ b/clang/test/CodeGen/X86/math-builtins.c
@@ -38,24 +38,6 @@ void foo(double *d, float f, float *fp, long double *l, int 
*i, const char *c) {
 // NO__ERRNO-NEXT: [[FREXP_F128_0:%.+]] = extractvalue { fp128, i32 } 
[[FREXP_F128]], 0
 
 
-// NO__ERRNO: [[MODF_F64:%.+]] = call { double, double } @llvm.modf.f64(double 
%{{.+}})
-// NO__ERRNO-NEXT: [[MODF_F64_FP:%.+]] = extractvalue { double, double } 
[[MODF_F64]], 0
-// NO__ERRNO-NEXT: [[MODF_F64_IP:%.+]] = extractvalue { double, double } 
[[MODF_F64]], 1
-// NO__ERRNO-NEXT: store double [[MODF_F64_IP]], ptr %{{.+}}, align 8
-
-// NO__ERRNO: [[MODF_F32:%.+]] = call { float, float } @llvm.modf.f32(float 
%{{.+}})
-// NO__ERRNO-NEXT: [[MODF_F32_FP:%.+]] = extractvalue { float, float } 
[[MODF_F32]], 0
-// NO__ERRNO-NEXT: [[MODF_F32_IP:%.+]] = extractvalue { float, float } 
[[MODF_F32]], 1
-// NO__ERRNO-NEXT: store float [[MODF_F32_IP]], ptr %{{.+}}, align 4
-
-// NO__ERRNO: [[MODF_F80:%.+]] = call { x86_fp80, x86_fp80 } 
@llvm.modf.f80(x86_fp80 %{{.+}})
-// NO__ERRNO-NEXT: [[MODF_F80_FP:%.+]] = extractvalue { x86_fp80, x86_fp80 } 
[[MODF_F80]], 0
-// NO__ERRNO-NEXT: [[MODF_F80_IP:%.+]] = extractvalue { x86_fp80, x86_fp80 } 
[[MODF_F80]], 1
-// NO__ERRNO-NEXT: store x86_fp80 [[MODF_F80_IP]], ptr %{{.+}}, align 16
-
-// NO__ERRNO: call fp128 @modff128(fp128 noundef %{{.+}}, ptr noundef %{{.+}})
-
-
 // NO__ERRNO: [[SINCOS_F64:%.+]] = call { double, double } 
@llvm.sincos.f64(double %{{.+}})
 // NO__ERRNO-NEXT: [[SINCOS_F64_0:%.+]] = extractvalue { double, double } 
[[SINCOS_F64]], 0
 // NO__ERRNO-NEXT: [[SINCOS_F64_1:%.+]] = extractvalue { double, double } 
[[SINCOS_F64]], 1
@@ -157,13 +139,13 @@ void foo(double *d, float f, float *fp, long double *l, 
int *i, const char *c) {
 
   __builtin_modf(f,d);   __builtin_modff(f,fp);  __builtin_modfl(f,l); 
__builtin_modff128(f,l);
 
-// NO__ERRNO: declare { double, double } @llvm.modf.f64(double) 
[[READNONE_INTRINSIC]]
-// NO__ER

[llvm-branch-commits] [llvm] [RegAllocFast][NPM] Make RegAllocFastPassOptions a nested class (PR #127984)

2025-02-20 Thread Akshat Oke via llvm-branch-commits

optimisan wrote:

* **#127984** (this)
* **#125351** 
* **#120557**
* **#119540** 
* **#118462** 
* **#117309** 
* **#119181** 
* `main`


https://github.com/llvm/llvm-project/pull/127984
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [RegAllocFast][NPM] Make RegAllocFastPassOptions a nested class (PR #127984)

2025-02-20 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan ready_for_review 
https://github.com/llvm/llvm-project/pull/127984
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NewPM] Port SIOptimizeExecMaskingPreRA to NPM (PR #125351)

2025-02-20 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/125351

>From 9c7ddfe1fd7b93bfa997bc595c7ba3056fa53534 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Sat, 1 Feb 2025 18:21:24 +
Subject: [PATCH 1/2] [AMDGPU][NewPM] Port SIOptimizeExecMaskingPreRA to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  2 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  3 +-
 .../AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 42 ++-
 .../AMDGPU/SIOptimizeExecMaskingPreRA.h   | 24 +++
 .../CodeGen/AMDGPU/collapse-endcf-broken.mir  |  1 +
 ...ask-pre-ra-non-empty-but-used-interval.mir |  1 +
 7 files changed, 62 insertions(+), 13 deletions(-)
 create mode 100644 llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 42392e22643b2..80786c6fefd3a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -368,7 +368,7 @@ struct AMDGPUUnifyMetadataPass : 
PassInfoMixin {
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
-void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
+void initializeSIOptimizeExecMaskingPreRALegacyPass(PassRegistry &);
 extern char &SIOptimizeExecMaskingPreRAID;
 
 void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index fd1341e8c91b2..a4504d78c7250 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", 
SILowerSGPRSpillsPass())
 MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
 MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
 MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
+MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", 
SIOptimizeExecMaskingPreRAPass())
 MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
 MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
 MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
@@ -130,7 +131,6 @@ DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", 
SIInsertWaitcntsPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", 
SILateBranchLoweringPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", 
SIOptimizeExecMaskingPreRAPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
 // already exists.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7c9377e61230b..dbd126d18785a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -51,6 +51,7 @@
 #include "SIMachineFunctionInfo.h"
 #include "SIMachineScheduler.h"
 #include "SIOptimizeExecMasking.h"
+#include "SIOptimizeExecMaskingPreRA.h"
 #include "SIOptimizeVGPRLiveRange.h"
 #include "SIPeepholeSDWA.h"
 #include "SIPreAllocateWWMRegs.h"
@@ -501,7 +502,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSIFoldOperandsLegacyPass(*PR);
   initializeSIPeepholeSDWALegacyPass(*PR);
   initializeSIShrinkInstructionsLegacyPass(*PR);
-  initializeSIOptimizeExecMaskingPreRAPass(*PR);
+  initializeSIOptimizeExecMaskingPreRALegacyPass(*PR);
   initializeSIOptimizeVGPRLiveRangeLegacyPass(*PR);
   initializeSILoadStoreOptimizerLegacyPass(*PR);
   initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp 
b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 31f65d82a4d2b..2a8a398d7429d 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -12,6 +12,7 @@
 ///
 
//===--===//
 
+#include "SIOptimizeExecMaskingPreRA.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -25,7 +26,7 @@ using namespace llvm;
 
 namespace {
 
-class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
+class SIOptimizeExecMaskingPreRA {
 private:
   const SIRegisterInfo *TRI;
   const SIInstrInfo *TII;
@@ -42,11 +43,18 @@ class SIOptimizeExecMaskingPreRA : public 
MachineFunctionPass {
   bool optimizeVcndVcmpPair(MachineBasicBlock &MBB);
   bool optimizeElseBranch(MachineBasicBlock &MBB);
 
+public:
+  SIOptimizeExecMaskingPreRA(LiveIntervals *LIS) : LIS(LIS) {}
+  bool run(MachineFunction &MF);
+};
+
+class SIOptimi

[llvm-branch-commits] [llvm] [RegAllocFast][NPM] Make RegAllocFastPassOptions a nested class (PR #127984)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/127984
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [RegAllocFast][NPM] Make RegAllocFastPassOptions a nested class (PR #127984)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-regalloc

Author: Akshat Oke (optimisan)


Changes

Making all reg alloc classes have an `::Option` class makes things nicer to 
construct them.

---
Full diff: https://github.com/llvm/llvm-project/pull/127984.diff


3 Files Affected:

- (modified) llvm/include/llvm/CodeGen/RegAllocFast.h (+14-10) 
- (modified) llvm/include/llvm/Passes/MachinePassRegistry.def (+1-1) 
- (modified) llvm/lib/Passes/PassBuilder.cpp (+2-2) 


``diff
diff --git a/llvm/include/llvm/CodeGen/RegAllocFast.h 
b/llvm/include/llvm/CodeGen/RegAllocFast.h
index b2ca9e10bf464..015b666400e05 100644
--- a/llvm/include/llvm/CodeGen/RegAllocFast.h
+++ b/llvm/include/llvm/CodeGen/RegAllocFast.h
@@ -9,23 +9,24 @@
 #ifndef LLVM_CODEGEN_REGALLOCFAST_H
 #define LLVM_CODEGEN_REGALLOCFAST_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/RegAllocCommon.h"
 
 namespace llvm {
 
-struct RegAllocFastPassOptions {
-  RegAllocFilterFunc Filter = nullptr;
-  StringRef FilterName = "all";
-  bool ClearVRegs = true;
-};
-
 class RegAllocFastPass : public PassInfoMixin {
-  RegAllocFastPassOptions Opts;
-
 public:
-  RegAllocFastPass(RegAllocFastPassOptions Opts = RegAllocFastPassOptions())
-  : Opts(Opts) {}
+  struct Options {
+RegAllocFilterFunc Filter;
+StringRef FilterName;
+bool ClearVRegs;
+Options(RegAllocFilterFunc F = nullptr, StringRef FN = "all",
+bool CV = true)
+: Filter(F), FilterName(FN), ClearVRegs(CV) {}
+  };
+
+  RegAllocFastPass(Options Opts = Options()) : Opts(Opts) {}
 
   MachineFunctionProperties getRequiredProperties() const {
 return MachineFunctionProperties().set(
@@ -52,6 +53,9 @@ class RegAllocFastPass : public 
PassInfoMixin {
  function_ref MapClassName2PassName);
 
   static bool isRequired() { return true; }
+
+private:
+  Options Opts;
 };
 
 } // namespace llvm
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index bb1a59a9c4ed3..31f260c560dd6 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -189,7 +189,7 @@ MACHINE_FUNCTION_PASS("verify", 
MachineTraceMetricsVerifi
 #endif
 MACHINE_FUNCTION_PASS_WITH_PARAMS(
 "regallocfast", "RegAllocFastPass",
-[](RegAllocFastPassOptions Opts) { return RegAllocFastPass(Opts); },
+[](RegAllocFastPass::Options Opts) { return RegAllocFastPass(Opts); },
 [PB = this](StringRef Params) {
   return parseRegAllocFastPassOptions(*PB, Params);
 },
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 4e21ef0704e5d..614cbe85ff2bd 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1333,9 +1333,9 @@ Expected> 
parseInternalizeGVs(StringRef Params) {
   return Expected>(std::move(PreservedGVs));
 }
 
-Expected
+Expected
 parseRegAllocFastPassOptions(PassBuilder &PB, StringRef Params) {
-  RegAllocFastPassOptions Opts;
+  RegAllocFastPass::Options Opts;
   while (!Params.empty()) {
 StringRef ParamName;
 std::tie(ParamName, Params) = Params.split(';');

``




https://github.com/llvm/llvm-project/pull/127984
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Backport: [clang] Track function template instantiation from definition (#125266) (PR #127777)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar updated 
https://github.com/llvm/llvm-project/pull/12

>From bcbffe017bdfb03fafa6e3273336949001b72106 Mon Sep 17 00:00:00 2001
From: Matheus Izvekov 
Date: Wed, 5 Feb 2025 14:12:12 -0300
Subject: [PATCH] Reland: [clang] Track function template instantiation from
 definition (#125266)

This fixes instantiation of definition for friend function templates,
when the declaration found and the one containing the definition
have different template contexts.

In these cases, the the function declaration corresponding to the
definition is not available; it may not even be instantiated at all.

So this patch adds a bit which tracks which function template
declaration was instantiated from the member template.
It's used to find which primary template serves as a context
for the purpose of obtainining the template arguments needed
to instantiate the definition.

Fixes #55509
---
 clang/docs/ReleaseNotes.rst   |   1 +
 clang/include/clang/AST/Decl.h|   7 ++
 clang/include/clang/AST/DeclBase.h|  10 +-
 clang/include/clang/AST/DeclTemplate.h|  20 
 clang/lib/AST/Decl.cpp|   1 +
 clang/lib/Sema/SemaTemplateDeduction.cpp  |  17 +--
 clang/lib/Sema/SemaTemplateInstantiate.cpp|   9 +-
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  |  27 -
 clang/lib/Serialization/ASTReaderDecl.cpp |   1 +
 clang/lib/Serialization/ASTWriterDecl.cpp |   3 +-
 clang/test/SemaTemplate/GH55509.cpp   | 112 ++
 11 files changed, 180 insertions(+), 28 deletions(-)
 create mode 100644 clang/test/SemaTemplate/GH55509.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 03c420bcfd932..96c42b7e3916a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1055,6 +1055,7 @@ Bug Fixes to C++ Support
 - Fix that some dependent immediate expressions did not cause immediate 
escalation (#GH119046)
 - Fixed a substitution bug in transforming CTAD aliases when the type alias 
contains a non-pack template argument
   corresponding to a pack parameter (#GH124715)
+- Clang is now better at keeping track of friend function template instance 
contexts. (#GH55509)
 
 Bug Fixes to AST Handling
 ^
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 9593bab576412..362a2741a0cdd 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -2298,6 +2298,13 @@ class FunctionDecl : public DeclaratorDecl,
 FunctionDeclBits.IsLateTemplateParsed = ILT;
   }
 
+  bool isInstantiatedFromMemberTemplate() const {
+return FunctionDeclBits.IsInstantiatedFromMemberTemplate;
+  }
+  void setInstantiatedFromMemberTemplate(bool Val = true) {
+FunctionDeclBits.IsInstantiatedFromMemberTemplate = Val;
+  }
+
   /// Whether this function is "trivial" in some specialized C++ senses.
   /// Can only be true for default constructors, copy constructors,
   /// copy assignment operators, and destructors.  Not meaningful until
diff --git a/clang/include/clang/AST/DeclBase.h 
b/clang/include/clang/AST/DeclBase.h
index 3bb82c1572ef9..648dae2838e03 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -1780,6 +1780,8 @@ class DeclContext {
 uint64_t HasImplicitReturnZero : 1;
 LLVM_PREFERRED_TYPE(bool)
 uint64_t IsLateTemplateParsed : 1;
+LLVM_PREFERRED_TYPE(bool)
+uint64_t IsInstantiatedFromMemberTemplate : 1;
 
 /// Kind of contexpr specifier as defined by ConstexprSpecKind.
 LLVM_PREFERRED_TYPE(ConstexprSpecKind)
@@ -1830,7 +1832,7 @@ class DeclContext {
   };
 
   /// Number of inherited and non-inherited bits in FunctionDeclBitfields.
-  enum { NumFunctionDeclBits = NumDeclContextBits + 31 };
+  enum { NumFunctionDeclBits = NumDeclContextBits + 32 };
 
   /// Stores the bits used by CXXConstructorDecl. If modified
   /// NumCXXConstructorDeclBits and the accessor
@@ -1841,12 +1843,12 @@ class DeclContext {
 LLVM_PREFERRED_TYPE(FunctionDeclBitfields)
 uint64_t : NumFunctionDeclBits;
 
-/// 20 bits to fit in the remaining available space.
+/// 19 bits to fit in the remaining available space.
 /// Note that this makes CXXConstructorDeclBitfields take
 /// exactly 64 bits and thus the width of NumCtorInitializers
 /// will need to be shrunk if some bit is added to NumDeclContextBitfields,
 /// NumFunctionDeclBitfields or CXXConstructorDeclBitfields.
-uint64_t NumCtorInitializers : 17;
+uint64_t NumCtorInitializers : 16;
 LLVM_PREFERRED_TYPE(bool)
 uint64_t IsInheritingConstructor : 1;
 
@@ -1860,7 +1862,7 @@ class DeclContext {
   };
 
   /// Number of inherited and non-inherited bits in 
CXXConstructorDeclBitfields.
-  enum { NumCXXConstructorDeclBits = NumFunctionDeclBits + 20 };
+  enum { NumCXXConstructorDeclBits = NumFunctionDeclBits + 19 };
 
   /// Stores the bits used by ObjCMethodDecl

[llvm-branch-commits] [clang] 3007684 - release/20.x: [Clang] Remove the PackExpansion restrictions for rewrite substitution (#127174)

2025-02-20 Thread via llvm-branch-commits

Author: Younan Zhang
Date: 2025-02-20T15:08:46-08:00
New Revision: 3007684f86468c344c5d0b77217b40b33173cb02

URL: 
https://github.com/llvm/llvm-project/commit/3007684f86468c344c5d0b77217b40b33173cb02
DIFF: 
https://github.com/llvm/llvm-project/commit/3007684f86468c344c5d0b77217b40b33173cb02.diff

LOG: release/20.x: [Clang] Remove the PackExpansion restrictions for rewrite 
substitution (#127174)

This backports c08b80eb525a6e6a34d74634bf5181f11ed12984 with a release
note towards 20 so that we could resolve some pains in CTAD.

Added: 


Modified: 
clang/docs/ReleaseNotes.rst
clang/lib/Sema/SemaTemplate.cpp
clang/lib/Sema/SemaTemplateInstantiate.cpp
clang/test/AST/ast-dump-ctad-alias.cpp

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ad1a5e7ae282e..03c420bcfd932 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1053,6 +1053,8 @@ Bug Fixes to C++ Support
   template parameter. Now, such expression can be used with ``static_assert`` 
and ``constexpr``. (#GH123498)
 - Correctly determine the implicit constexprness of lambdas in dependent 
contexts. (#GH97958) (#GH114234)
 - Fix that some dependent immediate expressions did not cause immediate 
escalation (#GH119046)
+- Fixed a substitution bug in transforming CTAD aliases when the type alias 
contains a non-pack template argument
+  corresponding to a pack parameter (#GH124715)
 
 Bug Fixes to AST Handling
 ^

diff  --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 3944c4f67bab9..f4045debf4521 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -4905,7 +4905,7 @@ bool Sema::CheckTemplateTypeArgument(
 [[fallthrough]];
   }
   default: {
-// We allow instantiateing a template with template argument packs when
+// We allow instantiating a template with template argument packs when
 // building deduction guides.
 if (Arg.getKind() == TemplateArgument::Pack &&
 CodeSynthesisContexts.back().Kind ==

diff  --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp 
b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index c45d3ffe2508b..eec56b7493bad 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -1467,6 +1467,18 @@ namespace {
   }
 }
 
+static TemplateArgument
+getTemplateArgumentPackPatternForRewrite(const TemplateArgument &TA) {
+  if (TA.getKind() != TemplateArgument::Pack)
+return TA;
+  assert(TA.pack_size() == 1 &&
+ "unexpected pack arguments in template rewrite");
+  TemplateArgument Arg = *TA.pack_begin();
+  if (Arg.isPackExpansion())
+Arg = Arg.getPackExpansionPattern();
+  return Arg;
+}
+
 /// Transform the given declaration by instantiating a reference to
 /// this declaration.
 Decl *TransformDecl(SourceLocation Loc, Decl *D);
@@ -1624,7 +1636,7 @@ namespace {
   TemplateArgumentLoc Input = SemaRef.getTrivialTemplateArgumentLoc(
   pack, QualType(), SourceLocation{});
   TemplateArgumentLoc Output;
-  if (SemaRef.SubstTemplateArgument(Input, TemplateArgs, Output))
+  if (TransformTemplateArgument(Input, Output, Uneval))
 return true; // fails
   TArgs.push_back(Output.getArgument());
 }
@@ -2036,11 +2048,7 @@ TemplateName TemplateInstantiator::TransformTemplateName(
   if (TemplateArgs.isRewrite()) {
 // We're rewriting the template parameter as a reference to another
 // template parameter.
-if (Arg.getKind() == TemplateArgument::Pack) {
-  assert(Arg.pack_size() == 1 && Arg.pack_begin()->isPackExpansion() &&
- "unexpected pack arguments in template rewrite");
-  Arg = Arg.pack_begin()->getPackExpansionPattern();
-}
+Arg = getTemplateArgumentPackPatternForRewrite(Arg);
 assert(Arg.getKind() == TemplateArgument::Template &&
"unexpected nontype template argument kind in template 
rewrite");
 return Arg.getAsTemplate();
@@ -2121,11 +2129,7 @@ 
TemplateInstantiator::TransformTemplateParmRefExpr(DeclRefExpr *E,
   if (TemplateArgs.isRewrite()) {
 // We're rewriting the template parameter as a reference to another
 // template parameter.
-if (Arg.getKind() == TemplateArgument::Pack) {
-  assert(Arg.pack_size() == 1 && Arg.pack_begin()->isPackExpansion() &&
- "unexpected pack arguments in template rewrite");
-  Arg = Arg.pack_begin()->getPackExpansionPattern();
-}
+Arg = getTemplateArgumentPackPatternForRewrite(Arg);
 assert(Arg.getKind() == TemplateArgument::Expression &&
"unexpected nontype template argument kind in template rewrite");
 // FIXME: This can lead to the same subexpression appearing multiple

[llvm-branch-commits] [clang] release/20.x: [Clang] Remove the PackExpansion restrictions for rewrite substitution (PR #127174)

2025-02-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

@zyn0217 (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/127174
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [Clang] Remove the PackExpansion restrictions for rewrite substitution (PR #127174)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/127174
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Backport: [clang] Track function template instantiation from definition (#125266) (PR #127777)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar updated 
https://github.com/llvm/llvm-project/pull/12

>From c0f86c988617ab5104d5a95fbcac38fd0a8ee4d7 Mon Sep 17 00:00:00 2001
From: Matheus Izvekov 
Date: Wed, 5 Feb 2025 14:12:12 -0300
Subject: [PATCH] Reland: [clang] Track function template instantiation from
 definition (#125266)

This fixes instantiation of definition for friend function templates,
when the declaration found and the one containing the definition
have different template contexts.

In these cases, the the function declaration corresponding to the
definition is not available; it may not even be instantiated at all.

So this patch adds a bit which tracks which function template
declaration was instantiated from the member template.
It's used to find which primary template serves as a context
for the purpose of obtainining the template arguments needed
to instantiate the definition.

Fixes #55509
---
 clang/docs/ReleaseNotes.rst   |   1 +
 clang/include/clang/AST/Decl.h|   7 ++
 clang/include/clang/AST/DeclBase.h|  10 +-
 clang/include/clang/AST/DeclTemplate.h|  20 
 clang/lib/AST/Decl.cpp|   1 +
 clang/lib/Sema/SemaTemplateDeduction.cpp  |  17 +--
 clang/lib/Sema/SemaTemplateInstantiate.cpp|   9 +-
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  |  27 -
 clang/lib/Serialization/ASTReaderDecl.cpp |   1 +
 clang/lib/Serialization/ASTWriterDecl.cpp |   3 +-
 clang/test/SemaTemplate/GH55509.cpp   | 112 ++
 11 files changed, 180 insertions(+), 28 deletions(-)
 create mode 100644 clang/test/SemaTemplate/GH55509.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ad1a5e7ae282e..ee161515fe68b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1053,6 +1053,7 @@ Bug Fixes to C++ Support
   template parameter. Now, such expression can be used with ``static_assert`` 
and ``constexpr``. (#GH123498)
 - Correctly determine the implicit constexprness of lambdas in dependent 
contexts. (#GH97958) (#GH114234)
 - Fix that some dependent immediate expressions did not cause immediate 
escalation (#GH119046)
+- Clang is now better at keeping track of friend function template instance 
contexts. (#GH55509)
 
 Bug Fixes to AST Handling
 ^
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 9593bab576412..362a2741a0cdd 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -2298,6 +2298,13 @@ class FunctionDecl : public DeclaratorDecl,
 FunctionDeclBits.IsLateTemplateParsed = ILT;
   }
 
+  bool isInstantiatedFromMemberTemplate() const {
+return FunctionDeclBits.IsInstantiatedFromMemberTemplate;
+  }
+  void setInstantiatedFromMemberTemplate(bool Val = true) {
+FunctionDeclBits.IsInstantiatedFromMemberTemplate = Val;
+  }
+
   /// Whether this function is "trivial" in some specialized C++ senses.
   /// Can only be true for default constructors, copy constructors,
   /// copy assignment operators, and destructors.  Not meaningful until
diff --git a/clang/include/clang/AST/DeclBase.h 
b/clang/include/clang/AST/DeclBase.h
index 3bb82c1572ef9..648dae2838e03 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -1780,6 +1780,8 @@ class DeclContext {
 uint64_t HasImplicitReturnZero : 1;
 LLVM_PREFERRED_TYPE(bool)
 uint64_t IsLateTemplateParsed : 1;
+LLVM_PREFERRED_TYPE(bool)
+uint64_t IsInstantiatedFromMemberTemplate : 1;
 
 /// Kind of contexpr specifier as defined by ConstexprSpecKind.
 LLVM_PREFERRED_TYPE(ConstexprSpecKind)
@@ -1830,7 +1832,7 @@ class DeclContext {
   };
 
   /// Number of inherited and non-inherited bits in FunctionDeclBitfields.
-  enum { NumFunctionDeclBits = NumDeclContextBits + 31 };
+  enum { NumFunctionDeclBits = NumDeclContextBits + 32 };
 
   /// Stores the bits used by CXXConstructorDecl. If modified
   /// NumCXXConstructorDeclBits and the accessor
@@ -1841,12 +1843,12 @@ class DeclContext {
 LLVM_PREFERRED_TYPE(FunctionDeclBitfields)
 uint64_t : NumFunctionDeclBits;
 
-/// 20 bits to fit in the remaining available space.
+/// 19 bits to fit in the remaining available space.
 /// Note that this makes CXXConstructorDeclBitfields take
 /// exactly 64 bits and thus the width of NumCtorInitializers
 /// will need to be shrunk if some bit is added to NumDeclContextBitfields,
 /// NumFunctionDeclBitfields or CXXConstructorDeclBitfields.
-uint64_t NumCtorInitializers : 17;
+uint64_t NumCtorInitializers : 16;
 LLVM_PREFERRED_TYPE(bool)
 uint64_t IsInheritingConstructor : 1;
 
@@ -1860,7 +1862,7 @@ class DeclContext {
   };
 
   /// Number of inherited and non-inherited bits in 
CXXConstructorDeclBitfields.
-  enum { NumCXXConstructorDeclBits = NumFunctionDeclBits + 20 };
+  enum { NumCXXConstructorDeclBits = NumFunctionDeclBits +

[llvm-branch-commits] [clang] release/20.x: Revert "[C++20][Modules][Serialization] Delay marking pending incompl… (#127136) (PR #127252)

2025-02-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

@zixu-w (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/127252
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [C++20] [Modules] handling selectAny attribute for vardecl (PR #128114)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-modules

Author: None (llvmbot)


Changes

Backport 24c06a19be7bcf28b37e5eabbe65df95a2c0265a

Requested by: @ChuanqiXu9

---
Full diff: https://github.com/llvm/llvm-project/pull/128114.diff


2 Files Affected:

- (modified) clang/lib/Sema/SemaDecl.cpp (+2-1) 
- (added) clang/test/Modules/pr127943.cppm (+31) 


``diff
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 1ecb9aff5f319..01f09aba8c2ad 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -4803,7 +4803,8 @@ bool Sema::checkVarDeclRedefinition(VarDecl *Old, VarDecl 
*New) {
   (New->getFormalLinkage() == Linkage::Internal || New->isInline() ||
isa(New) ||
New->getDescribedVarTemplate() || New->getNumTemplateParameterLists() ||
-   New->getDeclContext()->isDependentContext())) {
+   New->getDeclContext()->isDependentContext() ||
+   New->hasAttr())) {
 // The previous definition is hidden, and multiple definitions are
 // permitted (in separate TUs). Demote this to a declaration.
 New->demoteThisDefinitionToDeclaration();
diff --git a/clang/test/Modules/pr127943.cppm b/clang/test/Modules/pr127943.cppm
new file mode 100644
index 0..7cc3be6903e6a
--- /dev/null
+++ b/clang/test/Modules/pr127943.cppm
@@ -0,0 +1,31 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/repro.cppm -fdeclspec -emit-module-interface 
-o %t/repro.pcm
+// RUN: %clang_cc1 -std=c++20 %t/source.cpp -fdeclspec -fsyntax-only -verify 
-fprebuilt-module-path=%t
+
+//--- repro_decl.hpp
+#pragma once
+
+extern "C"
+{
+__declspec(selectany) int foo = 0;
+}
+
+//--- repro.cppm
+module;
+#include "repro_decl.hpp"
+
+export module repro;
+
+export inline int func()
+{
+return foo;
+}
+
+//--- source.cpp
+// expected-no-diagnostics
+import repro;
+
+#include "repro_decl.hpp"

``




https://github.com/llvm/llvm-project/pull/128114
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [C++20] [Modules] handling selectAny attribute for vardecl (PR #128114)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: None (llvmbot)


Changes

Backport 24c06a19be7bcf28b37e5eabbe65df95a2c0265a

Requested by: @ChuanqiXu9

---
Full diff: https://github.com/llvm/llvm-project/pull/128114.diff


2 Files Affected:

- (modified) clang/lib/Sema/SemaDecl.cpp (+2-1) 
- (added) clang/test/Modules/pr127943.cppm (+31) 


``diff
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 1ecb9aff5f319..01f09aba8c2ad 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -4803,7 +4803,8 @@ bool Sema::checkVarDeclRedefinition(VarDecl *Old, VarDecl 
*New) {
   (New->getFormalLinkage() == Linkage::Internal || New->isInline() ||
isa(New) ||
New->getDescribedVarTemplate() || New->getNumTemplateParameterLists() ||
-   New->getDeclContext()->isDependentContext())) {
+   New->getDeclContext()->isDependentContext() ||
+   New->hasAttr())) {
 // The previous definition is hidden, and multiple definitions are
 // permitted (in separate TUs). Demote this to a declaration.
 New->demoteThisDefinitionToDeclaration();
diff --git a/clang/test/Modules/pr127943.cppm b/clang/test/Modules/pr127943.cppm
new file mode 100644
index 0..7cc3be6903e6a
--- /dev/null
+++ b/clang/test/Modules/pr127943.cppm
@@ -0,0 +1,31 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/repro.cppm -fdeclspec -emit-module-interface 
-o %t/repro.pcm
+// RUN: %clang_cc1 -std=c++20 %t/source.cpp -fdeclspec -fsyntax-only -verify 
-fprebuilt-module-path=%t
+
+//--- repro_decl.hpp
+#pragma once
+
+extern "C"
+{
+__declspec(selectany) int foo = 0;
+}
+
+//--- repro.cppm
+module;
+#include "repro_decl.hpp"
+
+export module repro;
+
+export inline int func()
+{
+return foo;
+}
+
+//--- source.cpp
+// expected-no-diagnostics
+import repro;
+
+#include "repro_decl.hpp"

``




https://github.com/llvm/llvm-project/pull/128114
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [C++20] [Modules] handling selectAny attribute for vardecl (PR #128114)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/128114

Backport 24c06a19be7bcf28b37e5eabbe65df95a2c0265a

Requested by: @ChuanqiXu9

>From 9aac12a38047a505bfa969b89f6cda17bf1cdfdf Mon Sep 17 00:00:00 2001
From: Chuanqi Xu 
Date: Fri, 21 Feb 2025 10:34:14 +0800
Subject: [PATCH] [C++20] [Modules] handling selectAny attribute for vardecl

Close https://github.com/llvm/llvm-project/issues/127963

The root cause of the problem seems to be that we didn't realize it
simply.

(cherry picked from commit 24c06a19be7bcf28b37e5eabbe65df95a2c0265a)
---
 clang/lib/Sema/SemaDecl.cpp  |  3 ++-
 clang/test/Modules/pr127943.cppm | 31 +++
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Modules/pr127943.cppm

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 1ecb9aff5f319..01f09aba8c2ad 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -4803,7 +4803,8 @@ bool Sema::checkVarDeclRedefinition(VarDecl *Old, VarDecl 
*New) {
   (New->getFormalLinkage() == Linkage::Internal || New->isInline() ||
isa(New) ||
New->getDescribedVarTemplate() || New->getNumTemplateParameterLists() ||
-   New->getDeclContext()->isDependentContext())) {
+   New->getDeclContext()->isDependentContext() ||
+   New->hasAttr())) {
 // The previous definition is hidden, and multiple definitions are
 // permitted (in separate TUs). Demote this to a declaration.
 New->demoteThisDefinitionToDeclaration();
diff --git a/clang/test/Modules/pr127943.cppm b/clang/test/Modules/pr127943.cppm
new file mode 100644
index 0..7cc3be6903e6a
--- /dev/null
+++ b/clang/test/Modules/pr127943.cppm
@@ -0,0 +1,31 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/repro.cppm -fdeclspec -emit-module-interface 
-o %t/repro.pcm
+// RUN: %clang_cc1 -std=c++20 %t/source.cpp -fdeclspec -fsyntax-only -verify 
-fprebuilt-module-path=%t
+
+//--- repro_decl.hpp
+#pragma once
+
+extern "C"
+{
+__declspec(selectany) int foo = 0;
+}
+
+//--- repro.cppm
+module;
+#include "repro_decl.hpp"
+
+export module repro;
+
+export inline int func()
+{
+return foo;
+}
+
+//--- source.cpp
+// expected-no-diagnostics
+import repro;
+
+#include "repro_decl.hpp"

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [C++20] [Modules] handling selectAny attribute for vardecl (PR #128114)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/128114
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [RegAllocFast][NPM] Make RegAllocFastPassOptions a nested class (PR #127984)

2025-02-20 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/127984

>From 254dabf793e4c6e7cc8a3aedbd8e093b88c583fa Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Tue, 18 Feb 2025 04:55:35 +
Subject: [PATCH] [RegAlloc][NPM] Make RegAllocFastPassOptions a nested class

Making all reg alloc classes have an `::Option` class makes things nicer
to construct them.
---
 llvm/include/llvm/CodeGen/RegAllocFast.h  | 24 +++
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/Passes/PassBuilder.cpp   |  4 ++--
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/RegAllocFast.h 
b/llvm/include/llvm/CodeGen/RegAllocFast.h
index b2ca9e10bf464..015b666400e05 100644
--- a/llvm/include/llvm/CodeGen/RegAllocFast.h
+++ b/llvm/include/llvm/CodeGen/RegAllocFast.h
@@ -9,23 +9,24 @@
 #ifndef LLVM_CODEGEN_REGALLOCFAST_H
 #define LLVM_CODEGEN_REGALLOCFAST_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/RegAllocCommon.h"
 
 namespace llvm {
 
-struct RegAllocFastPassOptions {
-  RegAllocFilterFunc Filter = nullptr;
-  StringRef FilterName = "all";
-  bool ClearVRegs = true;
-};
-
 class RegAllocFastPass : public PassInfoMixin {
-  RegAllocFastPassOptions Opts;
-
 public:
-  RegAllocFastPass(RegAllocFastPassOptions Opts = RegAllocFastPassOptions())
-  : Opts(Opts) {}
+  struct Options {
+RegAllocFilterFunc Filter;
+StringRef FilterName;
+bool ClearVRegs;
+Options(RegAllocFilterFunc F = nullptr, StringRef FN = "all",
+bool CV = true)
+: Filter(F), FilterName(FN), ClearVRegs(CV) {}
+  };
+
+  RegAllocFastPass(Options Opts = Options()) : Opts(Opts) {}
 
   MachineFunctionProperties getRequiredProperties() const {
 return MachineFunctionProperties().set(
@@ -52,6 +53,9 @@ class RegAllocFastPass : public 
PassInfoMixin {
  function_ref MapClassName2PassName);
 
   static bool isRequired() { return true; }
+
+private:
+  Options Opts;
 };
 
 } // namespace llvm
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 373bd047e2395..8de02e951ba52 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -189,7 +189,7 @@ MACHINE_FUNCTION_PASS("verify", 
MachineTraceMetricsVerifi
 #endif
 MACHINE_FUNCTION_PASS_WITH_PARAMS(
 "regallocfast", "RegAllocFastPass",
-[](RegAllocFastPassOptions Opts) { return RegAllocFastPass(Opts); },
+[](RegAllocFastPass::Options Opts) { return RegAllocFastPass(Opts); },
 [PB = this](StringRef Params) {
   return parseRegAllocFastPassOptions(*PB, Params);
 },
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 5bb2e7d0abdd9..3a078985c33e4 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1332,9 +1332,9 @@ Expected> 
parseInternalizeGVs(StringRef Params) {
   return Expected>(std::move(PreservedGVs));
 }
 
-Expected
+Expected
 parseRegAllocFastPassOptions(PassBuilder &PB, StringRef Params) {
-  RegAllocFastPassOptions Opts;
+  RegAllocFastPass::Options Opts;
   while (!Params.empty()) {
 StringRef ParamName;
 std::tie(ParamName, Params) = Params.split(';');

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [libc] release/20.x: [Clang] Fix cross-lane scan when given divergent lanes (#127703) (PR #128085)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: None (llvmbot)


Changes

Backport 6cc7ca084a5bbb7ccf606cab12065604453dde59

Requested by: @jhuber6

---
Full diff: https://github.com/llvm/llvm-project/pull/128085.diff


3 Files Affected:

- (modified) clang/lib/Headers/gpuintrin.h (+49-25) 
- (modified) clang/lib/Headers/nvptxintrin.h (+4-1) 
- (modified) libc/test/integration/src/__support/GPU/scan_reduce.cpp (+49) 


``diff
diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index 11c87e85cd497..efdc3d94ac0b3 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -150,35 +150,33 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t 
__idx, double __x,
 __builtin_bit_cast(uint64_t, __x), __width));
 }
 
-// Gets the sum of all lanes inside the warp or wavefront.
-#define __DO_LANE_SUM(__type, __suffix)
\
-  _DEFAULT_FN_ATTRS static __inline__ __type __gpu_lane_sum_##__suffix(
\
-  uint64_t __lane_mask, __type __x) {  
\
-for (uint32_t __step = __gpu_num_lanes() / 2; __step > 0; __step /= 2) {   
\
-  uint32_t __index = __step + __gpu_lane_id(); 
\
-  __x += __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x,   
\
-  __gpu_num_lanes());  
\
-}  
\
-return __gpu_read_first_lane_##__suffix(__lane_mask, __x); 
\
-  }
-__DO_LANE_SUM(uint32_t, u32); // uint32_t __gpu_lane_sum_u32(m, x)
-__DO_LANE_SUM(uint64_t, u64); // uint64_t __gpu_lane_sum_u64(m, x)
-__DO_LANE_SUM(float, f32);// float __gpu_lane_sum_f32(m, x)
-__DO_LANE_SUM(double, f64);   // double __gpu_lane_sum_f64(m, x)
-#undef __DO_LANE_SUM
-
 // Gets the accumulator scan of the threads in the warp or wavefront.
 #define __DO_LANE_SCAN(__type, __bitmask_type, __suffix)   
\
   _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_scan_##__suffix( 
\
   uint64_t __lane_mask, uint32_t __x) {
\
-for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) {   
\
-  uint32_t __index = __gpu_lane_id() - __step; 
\
-  __bitmask_type bitmask = __gpu_lane_id() >= __step;  
\
-  __x += __builtin_bit_cast(   
\
-  __type, -bitmask & __builtin_bit_cast(__bitmask_type,
\
-__gpu_shuffle_idx_##__suffix(  
\
-__lane_mask, __index, __x, 
\
-__gpu_num_lanes(;  
\
+uint64_t __first = __lane_mask >> __builtin_ctzll(__lane_mask);
\
+bool __divergent = __gpu_read_first_lane_##__suffix(   
\
+__lane_mask, __first & (__first + 1)); 
\
+if (__divergent) { 
\
+  __type __accum = 0;  
\
+  for (uint64_t __mask = __lane_mask; __mask; __mask &= __mask - 1) {  
\
+__type __index = __builtin_ctzll(__mask);  
\
+__type __tmp = __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x, 
\
+__gpu_num_lanes());
\
+__x = __gpu_lane_id() == __index ? __accum + __tmp : __x;  
\
+__accum += __tmp;  
\
+  }
\
+} else {   
\
+  for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { 
\
+uint32_t __index = __gpu_lane_id() - __step;   
\
+__bitmask_type bitmask = __gpu_lane_id() >= __step;
\
+__x += __builtin_bit_cast( 
\
+__type,
\
+-bitmask & __builtin_bit_cast(__bitmask_type,  
\
+  __gpu_shuffle_idx_##__suffix(
\
+  __lane_mask, __index, __x,   
\
+  __gpu_num_lanes(;
\
+  }
\
 }  
\
 return __x;
\
   }
@@ -188,6 +186,3

[llvm-branch-commits] [clang] release/20.x: Revert "[C++20][Modules][Serialization] Delay marking pending incompl… (#127136) (PR #127252)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/127252
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Form v2f16 minimum3/maximum3 on gfx950 (PR #128123)

2025-02-20 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian approved this pull request.


https://github.com/llvm/llvm-project/pull/128123
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Form v2f16 minimum3/maximum3 on gfx950 (PR #128123)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Feb 21, 12:02 AM EST**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/128123).


https://github.com/llvm/llvm-project/pull/128123
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Form v2f16 minimum3/maximum3 on gfx950 (PR #128123)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/128123

None

>From da446d3f28f3b38d4e36a70ad9e1973f7ad9e707 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Tue, 28 May 2024 12:59:41 +0200
Subject: [PATCH] AMDGPU: Form v2f16 minimum3/maximum3 on gfx950

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp   |   3 +-
 llvm/lib/Target/AMDGPU/VOP3PInstructions.td |   4 +-
 llvm/test/CodeGen/AMDGPU/fmaximum3.ll   | 175 ++--
 llvm/test/CodeGen/AMDGPU/fminimum3.ll   | 175 ++--
 4 files changed, 112 insertions(+), 245 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0b13a53a0c989..6ed09253c51e1 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -13515,7 +13515,8 @@ static bool supportsMin3Max3(const GCNSubtarget 
&Subtarget, unsigned Opc,
   case ISD::FMINIMUM:
   case ISD::FMAXIMUM:
 return (VT == MVT::f32 && Subtarget.hasMinimum3Maximum3F32()) ||
-   (VT == MVT::f16 && Subtarget.hasMinimum3Maximum3F16());
+   (VT == MVT::f16 && Subtarget.hasMinimum3Maximum3F16()) ||
+   (VT == MVT::v2f16 && Subtarget.hasMinimum3Maximum3PKF16());
   case ISD::SMAX:
   case ISD::SMIN:
   case ISD::UMAX:
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td 
b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index d5c6e8af109f4..85c047167f1e1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -145,8 +145,8 @@ def : VOP3PSatPat;
 } // End SubtargetPredicate = HasVOP3PInsts
 
 let SubtargetPredicate = HasMinimum3Maximum3PKF16, FPDPRounding = 1 in {
-defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", 
VOP3P_Profile>;
-defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", 
VOP3P_Profile>;
+defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", 
VOP3P_Profile, AMDGPUfminimum3>;
+defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", 
VOP3P_Profile, AMDGPUfmaximum3>;
 }
 
 // TODO: Make sure we're doing the right thing with denormals. Note
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll 
b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index f228824ff750e..2a372dffce650 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -1269,9 +1269,7 @@ define half @v_fmaximum3_f16(half %a, half %b, half %c) {
 ; GFX950-LABEL: v_fmaximum3_f16:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v1
-; GFX950-NEXT:s_nop 0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v2
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %max0 = call half @llvm.maximum.f16(half %a, half %b)
   %max1 = call half @llvm.maximum.f16(half %max0, half %c)
@@ -1306,9 +1304,7 @@ define half @v_fmaximum3_f16_commute(half %a, half %b, 
half %c) {
 ; GFX950-LABEL: v_fmaximum3_f16_commute:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v1
-; GFX950-NEXT:s_nop 0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v2, v0, v0
+; GFX950-NEXT:v_pk_maximum3_f16 v0, v2, v0, v1
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %max0 = call half @llvm.maximum.f16(half %a, half %b)
   %max1 = call half @llvm.maximum.f16(half %c, half %max0)
@@ -1346,10 +1342,9 @@ define amdgpu_ps i32 @s_fmaximum3_f16(half inreg %a, 
half inreg %b, half inreg %
 ;
 ; GFX950-LABEL: s_fmaximum3_f16:
 ; GFX950:   ; %bb.0:
-; GFX950-NEXT:v_mov_b32_e32 v0, s0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, s1, s1
-; GFX950-NEXT:s_nop 0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, s2, s2
+; GFX950-NEXT:v_mov_b32_e32 v0, s1
+; GFX950-NEXT:v_mov_b32_e32 v1, s2
+; GFX950-NEXT:v_pk_maximum3_f16 v0, s0, v0, v1
 ; GFX950-NEXT:s_nop 0
 ; GFX950-NEXT:v_and_b32_e32 v0, 0x, v0
 ; GFX950-NEXT:s_nop 0
@@ -1392,9 +1387,7 @@ define half @v_fmaximum3_f16_fabs0(half %a, half %b, half 
%c) {
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:v_and_b32_e32 v0, 0x7fff, v0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v1
-; GFX950-NEXT:s_nop 0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v2
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %a.fabs = call half @llvm.fabs.f16(half %a)
   %max0 = call half @llvm.maximum.f16(half %a.fabs, half %b)
@@ -1431,9 +1424,7 @@ define half @v_fmaximum3_f16_fabs1(half %a, half %b, half 
%c) {
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:v_and_b32_e32 v1, 0x7fff, v1
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v1
-; GFX950-NEXT:s_nop 0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v2
 ; GFX950-NEXT:s_set

[llvm-branch-commits] [llvm] AMDGPU: Form v2f16 minimum3/maximum3 on gfx950 (PR #128123)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---

Patch is 47.60 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/128123.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+2-1) 
- (modified) llvm/lib/Target/AMDGPU/VOP3PInstructions.td (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/fmaximum3.ll (+54-121) 
- (modified) llvm/test/CodeGen/AMDGPU/fminimum3.ll (+54-121) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0b13a53a0c989..6ed09253c51e1 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -13515,7 +13515,8 @@ static bool supportsMin3Max3(const GCNSubtarget 
&Subtarget, unsigned Opc,
   case ISD::FMINIMUM:
   case ISD::FMAXIMUM:
 return (VT == MVT::f32 && Subtarget.hasMinimum3Maximum3F32()) ||
-   (VT == MVT::f16 && Subtarget.hasMinimum3Maximum3F16());
+   (VT == MVT::f16 && Subtarget.hasMinimum3Maximum3F16()) ||
+   (VT == MVT::v2f16 && Subtarget.hasMinimum3Maximum3PKF16());
   case ISD::SMAX:
   case ISD::SMIN:
   case ISD::UMAX:
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td 
b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index d5c6e8af109f4..85c047167f1e1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -145,8 +145,8 @@ def : VOP3PSatPat;
 } // End SubtargetPredicate = HasVOP3PInsts
 
 let SubtargetPredicate = HasMinimum3Maximum3PKF16, FPDPRounding = 1 in {
-defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", 
VOP3P_Profile>;
-defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", 
VOP3P_Profile>;
+defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", 
VOP3P_Profile, AMDGPUfminimum3>;
+defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", 
VOP3P_Profile, AMDGPUfmaximum3>;
 }
 
 // TODO: Make sure we're doing the right thing with denormals. Note
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll 
b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index f228824ff750e..2a372dffce650 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -1269,9 +1269,7 @@ define half @v_fmaximum3_f16(half %a, half %b, half %c) {
 ; GFX950-LABEL: v_fmaximum3_f16:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v1
-; GFX950-NEXT:s_nop 0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v2
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %max0 = call half @llvm.maximum.f16(half %a, half %b)
   %max1 = call half @llvm.maximum.f16(half %max0, half %c)
@@ -1306,9 +1304,7 @@ define half @v_fmaximum3_f16_commute(half %a, half %b, 
half %c) {
 ; GFX950-LABEL: v_fmaximum3_f16_commute:
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v1
-; GFX950-NEXT:s_nop 0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v2, v0, v0
+; GFX950-NEXT:v_pk_maximum3_f16 v0, v2, v0, v1
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %max0 = call half @llvm.maximum.f16(half %a, half %b)
   %max1 = call half @llvm.maximum.f16(half %c, half %max0)
@@ -1346,10 +1342,9 @@ define amdgpu_ps i32 @s_fmaximum3_f16(half inreg %a, 
half inreg %b, half inreg %
 ;
 ; GFX950-LABEL: s_fmaximum3_f16:
 ; GFX950:   ; %bb.0:
-; GFX950-NEXT:v_mov_b32_e32 v0, s0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, s1, s1
-; GFX950-NEXT:s_nop 0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, s2, s2
+; GFX950-NEXT:v_mov_b32_e32 v0, s1
+; GFX950-NEXT:v_mov_b32_e32 v1, s2
+; GFX950-NEXT:v_pk_maximum3_f16 v0, s0, v0, v1
 ; GFX950-NEXT:s_nop 0
 ; GFX950-NEXT:v_and_b32_e32 v0, 0x, v0
 ; GFX950-NEXT:s_nop 0
@@ -1392,9 +1387,7 @@ define half @v_fmaximum3_f16_fabs0(half %a, half %b, half 
%c) {
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:v_and_b32_e32 v0, 0x7fff, v0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v1
-; GFX950-NEXT:s_nop 0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v2
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %a.fabs = call half @llvm.fabs.f16(half %a)
   %max0 = call half @llvm.maximum.f16(half %a.fabs, half %b)
@@ -1431,9 +1424,7 @@ define half @v_fmaximum3_f16_fabs1(half %a, half %b, half 
%c) {
 ; GFX950:   ; %bb.0:
 ; GFX950-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX950-NEXT:v_and_b32_e32 v1, 0x7fff, v1
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v1
-; GFX950-NEXT:s_nop 0
-; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v2, v2
+; GFX950-NEXT:v_pk_maximum3_f16 v0, v0, v1, v2
 ; GFX950-NEXT:s_setpc_b64 s[30:31]
   %b.fabs = call half @llvm.fabs.f16(half %b)
   %max0 = call half 

[llvm-branch-commits] [llvm] AMDGPU: Form v2f16 minimum3/maximum3 on gfx950 (PR #128123)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

Missed in initial gfx950 upstreaming 

https://github.com/llvm/llvm-project/pull/128123
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Form v2f16 minimum3/maximum3 on gfx950 (PR #128123)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/128123
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Form v2f16 minimum3/maximum3 on gfx950 (PR #128123)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm milestoned 
https://github.com/llvm/llvm-project/pull/128123
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Form v2f16 minimum3/maximum3 on gfx950 (PR #128123)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/128123?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#128123** https://app.graphite.dev/github/pr/llvm/llvm-project/128123?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/128123?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#128121** https://app.graphite.dev/github/pr/llvm/llvm-project/128121?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/128123
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [libc] release/20.x: [Clang] Add handlers for 'match_any' and 'match_all' to `gpuintrin.h` (#127504) (PR #127704)

2025-02-20 Thread Joseph Huber via llvm-branch-commits

jhuber6 wrote:

> @jhuber6 Why do you want to back port this and what's the impact if we don't?

Sorry, https://github.com/llvm/llvm-project/pull/127703 is the actually 
important one and I forget to cherry pick it, fixes a test and incorrect 
behavior. Figured if I was backporting that I could merge this as well, but if 
that's too much then it's not a big deal.

https://github.com/llvm/llvm-project/pull/127704
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 99947c5 - Revert "[C++20][Modules][Serialization] Delay marking pending incompl… (#127136)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

Author: Zixu Wang
Date: 2025-02-20T15:11:16-08:00
New Revision: 99947c59de7b8ecbdda2a8b8ce78abc3083adee0

URL: 
https://github.com/llvm/llvm-project/commit/99947c59de7b8ecbdda2a8b8ce78abc3083adee0
DIFF: 
https://github.com/llvm/llvm-project/commit/99947c59de7b8ecbdda2a8b8ce78abc3083adee0.diff

LOG: Revert "[C++20][Modules][Serialization] Delay marking pending incompl… 
(#127136)

…ete decl chains until the end of `finishPendingActions`. (#121245)"

This reverts commit a9e249f64e800fbb20a3b26c0cfb68c1a1aee5e1.

Reverting this change because of issue #126973.

(cherry picked from commit 912b154f3a3f8c3cebf5cc5731fd8b0749762da5)

Added: 


Modified: 
clang/lib/Serialization/ASTReader.cpp

Removed: 
clang/test/Modules/pr121245.cpp



diff  --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 24acd6e297e71..f524251c48ddd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -10186,12 +10186,12 @@ void ASTReader::visitTopLevelModuleMaps(
 }
 
 void ASTReader::finishPendingActions() {
-  while (!PendingIdentifierInfos.empty() ||
- !PendingDeducedFunctionTypes.empty() ||
- !PendingDeducedVarTypes.empty() || !PendingDeclChains.empty() ||
- !PendingMacroIDs.empty() || !PendingDeclContextInfos.empty() ||
- !PendingUpdateRecords.empty() ||
- !PendingObjCExtensionIvarRedeclarations.empty()) {
+  while (
+  !PendingIdentifierInfos.empty() || !PendingDeducedFunctionTypes.empty() 
||
+  !PendingDeducedVarTypes.empty() || !PendingIncompleteDeclChains.empty() 
||
+  !PendingDeclChains.empty() || !PendingMacroIDs.empty() ||
+  !PendingDeclContextInfos.empty() || !PendingUpdateRecords.empty() ||
+  !PendingObjCExtensionIvarRedeclarations.empty()) {
 // If any identifiers with corresponding top-level declarations have
 // been loaded, load those declarations now.
 using TopLevelDeclsMap =
@@ -10239,6 +10239,13 @@ void ASTReader::finishPendingActions() {
 }
 PendingDeducedVarTypes.clear();
 
+// For each decl chain that we wanted to complete while deserializing, mark
+// it as "still needs to be completed".
+for (unsigned I = 0; I != PendingIncompleteDeclChains.size(); ++I) {
+  markIncompleteDeclChain(PendingIncompleteDeclChains[I]);
+}
+PendingIncompleteDeclChains.clear();
+
 // Load pending declaration chains.
 for (unsigned I = 0; I != PendingDeclChains.size(); ++I)
   loadPendingDeclChain(PendingDeclChains[I].first,
@@ -10476,12 +10483,6 @@ void ASTReader::finishPendingActions() {
   for (auto *ND : PendingMergedDefinitionsToDeduplicate)
 getContext().deduplicateMergedDefinitonsFor(ND);
   PendingMergedDefinitionsToDeduplicate.clear();
-
-  // For each decl chain that we wanted to complete while deserializing, mark
-  // it as "still needs to be completed".
-  for (Decl *D : PendingIncompleteDeclChains)
-markIncompleteDeclChain(D);
-  PendingIncompleteDeclChains.clear();
 }
 
 void ASTReader::diagnoseOdrViolations() {

diff  --git a/clang/test/Modules/pr121245.cpp b/clang/test/Modules/pr121245.cpp
deleted file mode 100644
index 0e276ad0e435d..0
--- a/clang/test/Modules/pr121245.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-// If this test fails, it should be investigated under Debug builds.
-// Before the PR, this test was encountering an `llvm_unreachable()`.
-
-// RUN: rm -rf %t
-// RUN: mkdir -p %t
-// RUN: split-file %s %t
-// RUN: cd %t
-
-// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header %t/hu-01.h \
-// RUN:  -fcxx-exceptions -o %t/hu-01.pcm
-
-// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header %t/hu-02.h \
-// RUN:  -Wno-experimental-header-units -fcxx-exceptions \
-// RUN:  -fmodule-file=%t/hu-01.pcm -o %t/hu-02.pcm
-
-// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header %t/hu-03.h \
-// RUN:  -Wno-experimental-header-units -fcxx-exceptions \
-// RUN:  -fmodule-file=%t/hu-01.pcm -o %t/hu-03.pcm
-
-// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header %t/hu-04.h \
-// RUN:  -Wno-experimental-header-units -fcxx-exceptions \
-// RUN:  -fmodule-file=%t/hu-01.pcm -o %t/hu-04.pcm
-
-// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header %t/hu-05.h \
-// RUN:  -Wno-experimental-header-units -fcxx-exceptions \
-// RUN:  -fmodule-file=%t/hu-03.pcm -fmodule-file=%t/hu-04.pcm \
-// RUN:  -fmodule-file=%t/hu-01.pcm -o %t/hu-05.pcm
-
-// RUN: %clang_cc1 -std=c++20 -emit-obj %t/main.cpp \
-// RUN:  -Wno-experimental-header-units -fcxx-exceptions \
-// RUN:  -fmodule-file=%t/hu-02.pcm -fmodule-file=%t/hu-05.pcm \
-// RUN:  -fmodule-file=%t/hu-04.pcm -fmodule-file=%t/hu-03.pcm \
-// RUN:  -fmodule-file=%t/hu-01.pcm
-
-//--- hu-01.h
-template 
-struct A {
-  A() {}
-  ~A() {}
-};
-
-template 
-struct EBO : T {
-  EBO() = default;
-};
-
-template 
-struct HT : EBO> {};

[llvm-branch-commits] [clang] [libc] release/20.x: [Clang] Fix cross-lane scan when given divergent lanes (#127703) (PR #128085)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-libc

Author: None (llvmbot)


Changes

Backport 6cc7ca084a5bbb7ccf606cab12065604453dde59

Requested by: @jhuber6

---
Full diff: https://github.com/llvm/llvm-project/pull/128085.diff


3 Files Affected:

- (modified) clang/lib/Headers/gpuintrin.h (+49-25) 
- (modified) clang/lib/Headers/nvptxintrin.h (+4-1) 
- (modified) libc/test/integration/src/__support/GPU/scan_reduce.cpp (+49) 


``diff
diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index 11c87e85cd497..efdc3d94ac0b3 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -150,35 +150,33 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t 
__idx, double __x,
 __builtin_bit_cast(uint64_t, __x), __width));
 }
 
-// Gets the sum of all lanes inside the warp or wavefront.
-#define __DO_LANE_SUM(__type, __suffix)
\
-  _DEFAULT_FN_ATTRS static __inline__ __type __gpu_lane_sum_##__suffix(
\
-  uint64_t __lane_mask, __type __x) {  
\
-for (uint32_t __step = __gpu_num_lanes() / 2; __step > 0; __step /= 2) {   
\
-  uint32_t __index = __step + __gpu_lane_id(); 
\
-  __x += __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x,   
\
-  __gpu_num_lanes());  
\
-}  
\
-return __gpu_read_first_lane_##__suffix(__lane_mask, __x); 
\
-  }
-__DO_LANE_SUM(uint32_t, u32); // uint32_t __gpu_lane_sum_u32(m, x)
-__DO_LANE_SUM(uint64_t, u64); // uint64_t __gpu_lane_sum_u64(m, x)
-__DO_LANE_SUM(float, f32);// float __gpu_lane_sum_f32(m, x)
-__DO_LANE_SUM(double, f64);   // double __gpu_lane_sum_f64(m, x)
-#undef __DO_LANE_SUM
-
 // Gets the accumulator scan of the threads in the warp or wavefront.
 #define __DO_LANE_SCAN(__type, __bitmask_type, __suffix)   
\
   _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_scan_##__suffix( 
\
   uint64_t __lane_mask, uint32_t __x) {
\
-for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) {   
\
-  uint32_t __index = __gpu_lane_id() - __step; 
\
-  __bitmask_type bitmask = __gpu_lane_id() >= __step;  
\
-  __x += __builtin_bit_cast(   
\
-  __type, -bitmask & __builtin_bit_cast(__bitmask_type,
\
-__gpu_shuffle_idx_##__suffix(  
\
-__lane_mask, __index, __x, 
\
-__gpu_num_lanes(;  
\
+uint64_t __first = __lane_mask >> __builtin_ctzll(__lane_mask);
\
+bool __divergent = __gpu_read_first_lane_##__suffix(   
\
+__lane_mask, __first & (__first + 1)); 
\
+if (__divergent) { 
\
+  __type __accum = 0;  
\
+  for (uint64_t __mask = __lane_mask; __mask; __mask &= __mask - 1) {  
\
+__type __index = __builtin_ctzll(__mask);  
\
+__type __tmp = __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x, 
\
+__gpu_num_lanes());
\
+__x = __gpu_lane_id() == __index ? __accum + __tmp : __x;  
\
+__accum += __tmp;  
\
+  }
\
+} else {   
\
+  for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { 
\
+uint32_t __index = __gpu_lane_id() - __step;   
\
+__bitmask_type bitmask = __gpu_lane_id() >= __step;
\
+__x += __builtin_bit_cast( 
\
+__type,
\
+-bitmask & __builtin_bit_cast(__bitmask_type,  
\
+  __gpu_shuffle_idx_##__suffix(
\
+  __lane_mask, __index, __x,   
\
+  __gpu_num_lanes(;
\
+  }
\
 }  
\
 return __x;
\
   }
@@ -188,6 +186,32 @@ __

[llvm-branch-commits] [clang] [libc] release/20.x: [Clang] Fix cross-lane scan when given divergent lanes (#127703) (PR #128085)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/128085

Backport 6cc7ca084a5bbb7ccf606cab12065604453dde59

Requested by: @jhuber6

>From 41c0aae4c10555d9525640ace71feec808e67276 Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Wed, 19 Feb 2025 16:46:59 -0600
Subject: [PATCH] [Clang] Fix cross-lane scan when given divergent lanes
 (#127703)

Summary:
The scan operation implemented here only works if there are contiguous
ones in the executation mask that can be used to propagate the result.
There are two solutions to this, one is to enter 'whole-wave-mode' and
forcibly turn them back on, or to do this serially. This implementation
does the latter because it's more portable, but checks to see if the
parallel fast-path is applicable.

Needs to be backported for correct behavior and because it fixes a
failing libc test.

(cherry picked from commit 6cc7ca084a5bbb7ccf606cab12065604453dde59)
---
 clang/lib/Headers/gpuintrin.h | 74 ---
 clang/lib/Headers/nvptxintrin.h   |  5 +-
 .../src/__support/GPU/scan_reduce.cpp | 49 
 3 files changed, 102 insertions(+), 26 deletions(-)

diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index 11c87e85cd497..efdc3d94ac0b3 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -150,35 +150,33 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t 
__idx, double __x,
 __builtin_bit_cast(uint64_t, __x), __width));
 }
 
-// Gets the sum of all lanes inside the warp or wavefront.
-#define __DO_LANE_SUM(__type, __suffix)
\
-  _DEFAULT_FN_ATTRS static __inline__ __type __gpu_lane_sum_##__suffix(
\
-  uint64_t __lane_mask, __type __x) {  
\
-for (uint32_t __step = __gpu_num_lanes() / 2; __step > 0; __step /= 2) {   
\
-  uint32_t __index = __step + __gpu_lane_id(); 
\
-  __x += __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x,   
\
-  __gpu_num_lanes());  
\
-}  
\
-return __gpu_read_first_lane_##__suffix(__lane_mask, __x); 
\
-  }
-__DO_LANE_SUM(uint32_t, u32); // uint32_t __gpu_lane_sum_u32(m, x)
-__DO_LANE_SUM(uint64_t, u64); // uint64_t __gpu_lane_sum_u64(m, x)
-__DO_LANE_SUM(float, f32);// float __gpu_lane_sum_f32(m, x)
-__DO_LANE_SUM(double, f64);   // double __gpu_lane_sum_f64(m, x)
-#undef __DO_LANE_SUM
-
 // Gets the accumulator scan of the threads in the warp or wavefront.
 #define __DO_LANE_SCAN(__type, __bitmask_type, __suffix)   
\
   _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_scan_##__suffix( 
\
   uint64_t __lane_mask, uint32_t __x) {
\
-for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) {   
\
-  uint32_t __index = __gpu_lane_id() - __step; 
\
-  __bitmask_type bitmask = __gpu_lane_id() >= __step;  
\
-  __x += __builtin_bit_cast(   
\
-  __type, -bitmask & __builtin_bit_cast(__bitmask_type,
\
-__gpu_shuffle_idx_##__suffix(  
\
-__lane_mask, __index, __x, 
\
-__gpu_num_lanes(;  
\
+uint64_t __first = __lane_mask >> __builtin_ctzll(__lane_mask);
\
+bool __divergent = __gpu_read_first_lane_##__suffix(   
\
+__lane_mask, __first & (__first + 1)); 
\
+if (__divergent) { 
\
+  __type __accum = 0;  
\
+  for (uint64_t __mask = __lane_mask; __mask; __mask &= __mask - 1) {  
\
+__type __index = __builtin_ctzll(__mask);  
\
+__type __tmp = __gpu_shuffle_idx_##__suffix(__lane_mask, __index, __x, 
\
+__gpu_num_lanes());
\
+__x = __gpu_lane_id() == __index ? __accum + __tmp : __x;  
\
+__accum += __tmp;  
\
+  }
\
+} else {   
\
+  for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { 
\
+uint32_t __index = __gpu_lane_id() - __step;   
\
+__bitmask_type bitmask = __gpu_lane_id() >= __step;
\
+__x += __builtin_bit_cast( 

[llvm-branch-commits] [BOLT] Fix merge-fdata for memory events (PR #128108)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)


Changes

Don't attempt to parse mispredictions for memory entries in LBR profile.

Test Plan: added merge-fdata-mem-prof.test


---
Full diff: https://github.com/llvm/llvm-project/pull/128108.diff


2 Files Affected:

- (added) bolt/test/merge-fdata-mem-prof.test (+13) 
- (modified) bolt/tools/merge-fdata/merge-fdata.cpp (+10-3) 


``diff
diff --git a/bolt/test/merge-fdata-mem-prof.test 
b/bolt/test/merge-fdata-mem-prof.test
new file mode 100644
index 0..166d6028f7737
--- /dev/null
+++ b/bolt/test/merge-fdata-mem-prof.test
@@ -0,0 +1,13 @@
+## Check that merge-fdata tool correctly handles memory profile
+
+# REQUIRES: system-linux
+
+# RUN: split-file %s %t
+# RUN: merge-fdata %t/a.fdata -o %t/merged.fdata
+# RUN: FileCheck %s --input-file %t/merged.fdata
+
+# CHECK: 4 Curl_cf_def_query c 4 Curl_cft_h1_proxy 68 3
+
+#--- a.fdata
+4 Curl_cf_def_query c 4 Curl_cft_h1_proxy 68 1
+4 Curl_cf_def_query c 4 Curl_cft_h1_proxy 68 2
diff --git a/bolt/tools/merge-fdata/merge-fdata.cpp 
b/bolt/tools/merge-fdata/merge-fdata.cpp
index 74a5f8ca2d477..0cf5a03501728 100644
--- a/bolt/tools/merge-fdata/merge-fdata.cpp
+++ b/bolt/tools/merge-fdata/merge-fdata.cpp
@@ -316,11 +316,15 @@ void mergeLegacyProfiles(const 
SmallVectorImpl &Filenames) {
 do {
   StringRef Line(FdataLine);
   CounterTy Count;
+  unsigned Type = 0;
+  if (Line.split(' ').first.getAsInteger(10, Type))
+report_error(Filename, "Malformed / corrupted entry type");
+  bool IsBranchEntry = Type < 3;
   auto [Signature, ExecCount] = Line.rsplit(' ');
   if (ExecCount.getAsInteger(10, Count.Exec))
 report_error(Filename, "Malformed / corrupted execution count");
-  // Only LBR profile has misprediction field
-  if (!NoLBRCollection.value_or(false)) {
+  // Only LBR profile has misprediction field, branch entries
+  if (!NoLBRCollection.value_or(false) && IsBranchEntry) {
 auto [SignatureLBR, MispredCount] = Signature.rsplit(' ');
 Signature = SignatureLBR;
 if (MispredCount.getAsInteger(10, Count.Mispred))
@@ -356,7 +360,10 @@ void mergeLegacyProfiles(const 
SmallVectorImpl &Filenames) {
 output() << "no_lbr\n";
   for (const auto &[Key, Value] : MergedProfile) {
 output() << Key << " ";
-if (!NoLBRCollection.value_or(false))
+unsigned Type = 0;
+Key.split(' ').first.getAsInteger(10, Type);
+bool IsBranchEntry = Type < 3;
+if (!NoLBRCollection.value_or(false) && IsBranchEntry)
   output() << Value.Mispred << " ";
 output() << Value.Exec << "\n";
   }

``




https://github.com/llvm/llvm-project/pull/128108
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT] Fix merge-fdata for memory events (PR #128108)

2025-02-20 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov created 
https://github.com/llvm/llvm-project/pull/128108

Don't attempt to parse mispredictions for memory entries in LBR profile.

Test Plan: added merge-fdata-mem-prof.test



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [GlobalISel][AArch64] Fix fptoi.sat lowering. (#127901) (PR #128001)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/128001
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [GlobalISel][AArch64] Fix fptoi.sat lowering. (#127901) (PR #128001)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/128001

Backport 70ed381b1693697dec3efcaed161d3626d16cff1

Requested by: @davemgreen

>From 5b8fc308d40801f326ca5a2e8ca243450aca5263 Mon Sep 17 00:00:00 2001
From: David Green 
Date: Thu, 20 Feb 2025 12:22:11 +
Subject: [PATCH] [GlobalISel][AArch64] Fix fptoi.sat lowering. (#127901)

The SDAG version uses fminnum/fmaxnum, in converting it to fcmp+select
it appears the order of the operands was chosen badly. This switches the
conditions used to keep the constant on the RHS.

(cherry picked from commit 70ed381b1693697dec3efcaed161d3626d16cff1)
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp|   4 +-
 .../test/CodeGen/AArch64/fptosi-sat-scalar.ll |  12 +-
 .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 144 +-
 .../test/CodeGen/AArch64/fptoui-sat-scalar.ll |  12 +-
 .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 144 +-
 5 files changed, 158 insertions(+), 158 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp 
b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d0a62340a5f32..536c193d52080 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7801,13 +7801,13 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
   if (AreExactFloatBounds) {
 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
-auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
+auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
  SrcTy.changeElementSize(1), Src, MaxC);
 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
 // Clamp by MaxFloat from above. NaN cannot occur.
 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
 auto MinP =
-MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), 
Max,
+MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), 
Max,
  MinC, MachineInstr::FmNoNans);
 auto Min =
 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll 
b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index bfb5c67801e6c..39e2db3a52d2c 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -987,25 +987,25 @@ define i32 @test_signed_f128_i32(fp128 %f) {
 ; CHECK-GI-NEXT:adrp x8, .LCPI30_1
 ; CHECK-GI-NEXT:str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI30_1]
-; CHECK-GI-NEXT:bl __getf2
+; CHECK-GI-NEXT:bl __gttf2
 ; CHECK-GI-NEXT:ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:cmp w0, #0
 ; CHECK-GI-NEXT:mov x9, #-4603241769126068224 // =0xc01e
 ; CHECK-GI-NEXT:fmov x8, d0
-; CHECK-GI-NEXT:csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:mov x8, v0.d[1]
 ; CHECK-GI-NEXT:mov v0.d[0], x19
-; CHECK-GI-NEXT:csel x20, x8, x9, lt
+; CHECK-GI-NEXT:csel x20, x8, x9, gt
 ; CHECK-GI-NEXT:adrp x8, .LCPI30_0
 ; CHECK-GI-NEXT:mov v0.d[1], x20
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT:bl __gttf2
+; CHECK-GI-NEXT:bl __lttf2
 ; CHECK-GI-NEXT:cmp w0, #0
-; CHECK-GI-NEXT:csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:mov v0.d[0], x8
 ; CHECK-GI-NEXT:mov x8, #281474976448512 // =0xfffc
 ; CHECK-GI-NEXT:movk x8, #16413, lsl #48
-; CHECK-GI-NEXT:csel x8, x20, x8, gt
+; CHECK-GI-NEXT:csel x8, x20, x8, lt
 ; CHECK-GI-NEXT:mov v0.d[1], x8
 ; CHECK-GI-NEXT:bl __fixtfsi
 ; CHECK-GI-NEXT:ldr q0, [sp] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll 
b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index b2b3430f4d85e..67d625dd16473 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -535,25 +535,25 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> 
%f) {
 ; CHECK-GI-NEXT:adrp x8, .LCPI14_1
 ; CHECK-GI-NEXT:str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI14_1]
-; CHECK-GI-NEXT:bl __getf2
+; CHECK-GI-NEXT:bl __gttf2
 ; CHECK-GI-NEXT:ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:cmp w0, #0
 ; CHECK-GI-NEXT:mov x9, #-4603241769126068224 // =0xc01e
 ; CHECK-GI-NEXT:fmov x8, d0
-; CHECK-GI-NEXT:csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:mov x8, v0.d[1]
 ; CHECK-GI-NEXT:mov v0.d[0], x19
-; CHECK-GI-NEXT:csel x20, x8, x9, lt
+; CHECK-GI-NEXT:csel x20, x8, x9, gt
 ; CHECK-GI-NEXT:adrp x8, .LCPI14_0
 ; CHECK-GI-NEXT:mov v0.d[1], x20
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.L

[llvm-branch-commits] [llvm] release/20.x: [GlobalISel][AArch64] Fix fptoi.sat lowering. (#127901) (PR #128001)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:

@nikic What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/128001
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [flang] [flang][OpenMP] Map simple `do concurrent` loops to OpenMP host constructs (PR #127633)

2025-02-20 Thread Sergio Afonso via llvm-branch-commits


@@ -24,7 +25,82 @@ namespace flangomp {
 
 namespace {
 namespace looputils {
-using LoopNest = llvm::SetVector;
+/// Stores info needed about the induction/iteration variable for each `do
+/// concurrent` in a loop nest. This includes only for now:
+/// * the operation allocating memory for iteration variable,

skatrak wrote:

Nit: Instead of listing what the structure includes here, move each field's 
description above its declaration.

https://github.com/llvm/llvm-project/pull/127633
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++] Reduce the dependency of the locale base API on the base system from the headers (#117764) (PR #128009)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/128009

Backport bcfd9f81e1bc9954d616ffbb8625099916bebd5b 
f00b32e2d0ee666d32f1ddd0c687e269fab95b44

Requested by: @ldionne

>From 7ddd9cfecb603668fecb480459521400934b4d96 Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Wed, 5 Feb 2025 08:33:14 -0500
Subject: [PATCH 1/2] [libc++] Fix stray usage of
 _LIBCPP_HAS_NO_WIDE_CHARACTERS on Windows

(cherry picked from commit bcfd9f81e1bc9954d616ffbb8625099916bebd5b)
---
 libcxx/include/__locale_dir/support/windows.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/include/__locale_dir/support/windows.h 
b/libcxx/include/__locale_dir/support/windows.h
index ff89d3e87eb44..f0f76c527264a 100644
--- a/libcxx/include/__locale_dir/support/windows.h
+++ b/libcxx/include/__locale_dir/support/windows.h
@@ -215,7 +215,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, 
const char* __src, s
   return ::_strxfrm_l(__dest, __src, __n, __loc);
 }
 
-#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
+#if _LIBCPP_HAS_WIDE_CHARACTERS
 inline _LIBCPP_HIDE_FROM_ABI int __iswctype(wint_t __c, wctype_t __type, 
__locale_t __loc) {
   return ::_iswctype_l(__c, __type, __loc);
 }
@@ -240,7 +240,7 @@ inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* 
__ws1, const wchar_t*
 inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* 
__src, size_t __n, __locale_t __loc) {
   return ::_wcsxfrm_l(__dest, __src, __n, __loc);
 }
-#endif // !_LIBCPP_HAS_NO_WIDE_CHARACTERS
+#endif // _LIBCPP_HAS_WIDE_CHARACTERS
 
 #if defined(__MINGW32__) && __MSVCRT_VERSION__ < 0x0800
 _LIBCPP_EXPORTED_FROM_ABI size_t __strftime(char*, size_t, const char*, const 
struct tm*, __locale_t);

>From 249fbcd61acf2b807726c6a3eb0be6e46940d8ee Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Thu, 20 Feb 2025 08:38:42 -0500
Subject: [PATCH 2/2] [libc++] Reduce the dependency of the locale base API on
 the base system from the headers (#117764)

Many parts of the locale base API are only required when building the
shared/static library, but not from the headers. Document those
functions and carve out a few of those that don't work when
_XOPEN_SOURCE is defined to something old.

Fixes #117630

(cherry picked from commit f00b32e2d0ee666d32f1ddd0c687e269fab95b44)
---
 libcxx/include/__locale_dir/locale_base_api.h | 56 ---
 .../include/__locale_dir/support/bsd_like.h   | 22 +---
 libcxx/include/__locale_dir/support/fuchsia.h |  9 ++-
 .../support/no_locale/characters.h|  8 ++-
 libcxx/include/__locale_dir/support/windows.h | 18 --
 libcxx/test/libcxx/xopen_source.gen.py| 53 ++
 6 files changed, 128 insertions(+), 38 deletions(-)
 create mode 100644 libcxx/test/libcxx/xopen_source.gen.py

diff --git a/libcxx/include/__locale_dir/locale_base_api.h 
b/libcxx/include/__locale_dir/locale_base_api.h
index bbee9f49867fd..c1e73caeecced 100644
--- a/libcxx/include/__locale_dir/locale_base_api.h
+++ b/libcxx/include/__locale_dir/locale_base_api.h
@@ -23,12 +23,16 @@
 // Variadic functions may be implemented as templates with a parameter pack 
instead
 // of C-style variadic functions.
 //
+// Most of these functions are only required when building the library. 
Functions that are also
+// required when merely using the headers are marked as such below.
+//
 // TODO: __localeconv shouldn't take a reference, but the Windows 
implementation doesn't allow copying __locale_t
+// TODO: Eliminate the need for any of these functions from the headers.
 //
 // Locale management
 // -
 // namespace __locale {
-//  using __locale_t = implementation-defined;
+//  using __locale_t = implementation-defined;  // required by the headers
 //  using __lconv_t  = implementation-defined;
 //  __locale_t  __newlocale(int, const char*, __locale_t);
 //  void__freelocale(__locale_t);
@@ -36,6 +40,7 @@
 //  __lconv_t*  __localeconv(__locale_t&);
 // }
 //
+// // required by the headers
 // #define _LIBCPP_COLLATE_MASK   /* implementation-defined */
 // #define _LIBCPP_CTYPE_MASK /* implementation-defined */
 // #define _LIBCPP_MONETARY_MASK  /* implementation-defined */
@@ -48,6 +53,7 @@
 // Strtonum functions
 // --
 // namespace __locale {
+//  // required by the headers
 //  float   __strtof(const char*, char**, __locale_t);
 //  double  __strtod(const char*, char**, __locale_t);
 //  long double __strtold(const char*, char**, __locale_t);
@@ -60,8 +66,8 @@
 // namespace __locale {
 //  int __islower(int, __locale_t);
 //  int __isupper(int, __locale_t);
-//  int __isdigit(int, __locale_t);
-//  int __isxdigit(int, __locale_t);
+//  int __isdigit(int, __locale_t);  // required by the headers
+//  int __isxdigit(int, __locale_t); // required by the headers
 //  int __toupper(int, __locale_t);
 //  int __tolower(int, __locale_t);
 //  int __strcol

[llvm-branch-commits] [libcxx] release/20.x: [libc++] Reduce the dependency of the locale base API on the base system from the headers (#117764) (PR #128009)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/128009
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++] Reduce the dependency of the locale base API on the base system from the headers (#117764) (PR #128009)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-libcxx

Author: None (llvmbot)


Changes

Backport bcfd9f81e1bc9954d616ffbb8625099916bebd5b 
f00b32e2d0ee666d32f1ddd0c687e269fab95b44

Requested by: @ldionne

---

Patch is 21.42 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/128009.diff


6 Files Affected:

- (modified) libcxx/include/__locale_dir/locale_base_api.h (+36-20) 
- (modified) libcxx/include/__locale_dir/support/bsd_like.h (+14-8) 
- (modified) libcxx/include/__locale_dir/support/fuchsia.h (+6-3) 
- (modified) libcxx/include/__locale_dir/support/no_locale/characters.h (+6-2) 
- (modified) libcxx/include/__locale_dir/support/windows.h (+13-5) 
- (added) libcxx/test/libcxx/xopen_source.gen.py (+53) 


``diff
diff --git a/libcxx/include/__locale_dir/locale_base_api.h 
b/libcxx/include/__locale_dir/locale_base_api.h
index bbee9f49867fd..c1e73caeecced 100644
--- a/libcxx/include/__locale_dir/locale_base_api.h
+++ b/libcxx/include/__locale_dir/locale_base_api.h
@@ -23,12 +23,16 @@
 // Variadic functions may be implemented as templates with a parameter pack 
instead
 // of C-style variadic functions.
 //
+// Most of these functions are only required when building the library. 
Functions that are also
+// required when merely using the headers are marked as such below.
+//
 // TODO: __localeconv shouldn't take a reference, but the Windows 
implementation doesn't allow copying __locale_t
+// TODO: Eliminate the need for any of these functions from the headers.
 //
 // Locale management
 // -
 // namespace __locale {
-//  using __locale_t = implementation-defined;
+//  using __locale_t = implementation-defined;  // required by the headers
 //  using __lconv_t  = implementation-defined;
 //  __locale_t  __newlocale(int, const char*, __locale_t);
 //  void__freelocale(__locale_t);
@@ -36,6 +40,7 @@
 //  __lconv_t*  __localeconv(__locale_t&);
 // }
 //
+// // required by the headers
 // #define _LIBCPP_COLLATE_MASK   /* implementation-defined */
 // #define _LIBCPP_CTYPE_MASK /* implementation-defined */
 // #define _LIBCPP_MONETARY_MASK  /* implementation-defined */
@@ -48,6 +53,7 @@
 // Strtonum functions
 // --
 // namespace __locale {
+//  // required by the headers
 //  float   __strtof(const char*, char**, __locale_t);
 //  double  __strtod(const char*, char**, __locale_t);
 //  long double __strtold(const char*, char**, __locale_t);
@@ -60,8 +66,8 @@
 // namespace __locale {
 //  int __islower(int, __locale_t);
 //  int __isupper(int, __locale_t);
-//  int __isdigit(int, __locale_t);
-//  int __isxdigit(int, __locale_t);
+//  int __isdigit(int, __locale_t);  // required by the headers
+//  int __isxdigit(int, __locale_t); // required by the headers
 //  int __toupper(int, __locale_t);
 //  int __tolower(int, __locale_t);
 //  int __strcoll(const char*, const char*, __locale_t);
@@ -99,9 +105,10 @@
 //  int __mbtowc(wchar_t*, const char*, size_t, __locale_t);
 //  size_t  __mbrlen(const char*, size_t, mbstate_t*, __locale_t);
 //  size_t  __mbsrtowcs(wchar_t*, const char**, size_t, mbstate_t*, 
__locale_t);
-//  int __snprintf(char*, size_t, __locale_t, const char*, ...);
-//  int __asprintf(char**, __locale_t, const char*, ...);
-//  int __sscanf(const char*, __locale_t, const char*, ...);
+//
+//  int __snprintf(char*, size_t, __locale_t, const char*, ...); // 
required by the headers
+//  int __asprintf(char**, __locale_t, const char*, ...);// 
required by the headers
+//  int __sscanf(const char*, __locale_t, const char*, ...); // 
required by the headers
 // }
 
 #if defined(__APPLE__)
@@ -143,8 +150,19 @@ namespace __locale {
 //
 // Locale management
 //
+#  define _LIBCPP_COLLATE_MASK LC_COLLATE_MASK
+#  define _LIBCPP_CTYPE_MASK LC_CTYPE_MASK
+#  define _LIBCPP_MONETARY_MASK LC_MONETARY_MASK
+#  define _LIBCPP_NUMERIC_MASK LC_NUMERIC_MASK
+#  define _LIBCPP_TIME_MASK LC_TIME_MASK
+#  define _LIBCPP_MESSAGES_MASK LC_MESSAGES_MASK
+#  define _LIBCPP_ALL_MASK LC_ALL_MASK
+#  define _LIBCPP_LC_ALL LC_ALL
+
 using __locale_t _LIBCPP_NODEBUG = locale_t;
-using __lconv_t _LIBCPP_NODEBUG  = lconv;
+
+#  if defined(_LIBCPP_BUILDING_LIBRARY)
+using __lconv_t _LIBCPP_NODEBUG = lconv;
 
 inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const 
char* __name, __locale_t __loc) {
   return newlocale(__category_mask, __name, __loc);
@@ -157,15 +175,7 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int 
__category, char const* __loc
 inline _LIBCPP_HIDE_FROM_ABI void __freelocale(__locale_t __loc) { 
freelocale(__loc); }
 
 inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { 
return __libcpp_localeconv_l(__loc); }
-
-#  define _LIBCPP_COLLATE_MASK LC_COLLATE_MASK
-#  define _LIBCPP_CTYPE_MASK LC_CTYPE_MASK
-#  define _LIBCPP_MONETARY_MASK LC_MONETARY_MASK
-#  define _LIBC

[llvm-branch-commits] [flang] [flang][OpenMP] Map simple `do concurrent` loops to OpenMP host constructs (PR #127633)

2025-02-20 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy updated 
https://github.com/llvm/llvm-project/pull/127633

>From a615d777295ac1cf5fc1c8bd2eca9f5d6283b409 Mon Sep 17 00:00:00 2001
From: ergawy 
Date: Tue, 18 Feb 2025 02:50:46 -0600
Subject: [PATCH] [flang][OpenMP] Map simple `do concurrent` loops to OpenMP
 host constructs

Upstreams one more part of the ROCm `do concurrent` to OpenMP mapping
pass. This PR add support for converting simple loops to the equivalent
OpenMP constructs on the host: `omp parallel do`. Towards that end, we
have to collect more information about loop nests for which we add new
utils in the `looputils` name space.
---
 flang/docs/DoConcurrentConversionToOpenMP.md  |  47 
 .../OpenMP/DoConcurrentConversion.cpp | 211 +-
 .../Transforms/DoConcurrent/basic_host.f90|  14 +-
 .../Transforms/DoConcurrent/basic_host.mlir   |  62 +
 .../DoConcurrent/non_const_bounds.f90 |  45 
 .../DoConcurrent/not_perfectly_nested.f90 |  45 
 6 files changed, 405 insertions(+), 19 deletions(-)
 create mode 100644 flang/test/Transforms/DoConcurrent/basic_host.mlir
 create mode 100644 flang/test/Transforms/DoConcurrent/non_const_bounds.f90
 create mode 100644 flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90

diff --git a/flang/docs/DoConcurrentConversionToOpenMP.md 
b/flang/docs/DoConcurrentConversionToOpenMP.md
index ec2e11684b33c..24ed35ca00403 100644
--- a/flang/docs/DoConcurrentConversionToOpenMP.md
+++ b/flang/docs/DoConcurrentConversionToOpenMP.md
@@ -126,6 +126,53 @@ see the "Data environment" section below.
 See `flang/test/Transforms/DoConcurrent/loop_nest_test.f90` for more examples
 of what is and is not detected as a perfect loop nest.
 
+### Single-range loops
+
+Given the following loop:
+```fortran
+  do concurrent(i=1:n)
+a(i) = i * i
+  end do
+```
+
+ Mapping to `host`
+
+Mapping this loop to the `host`, generates MLIR operations of the following
+structure:
+
+```
+%4 = fir.address_of(@_QFEa) ...
+%6:2 = hlfir.declare %4 ...
+
+omp.parallel {
+  // Allocate private copy for `i`.
+  // TODO Use delayed privatization.
+  %19 = fir.alloca i32 {bindc_name = "i"}
+  %20:2 = hlfir.declare %19 {uniq_name = "_QFEi"} ...
+
+  omp.wsloop {
+omp.loop_nest (%arg0) : index = (%21) to (%22) inclusive step (%c1_2) {
+  %23 = fir.convert %arg0 : (index) -> i32
+  // Use the privatized version of `i`.
+  fir.store %23 to %20#1 : !fir.ref
+  ...
+
+  // Use "shared" SSA value of `a`.
+  %42 = hlfir.designate %6#0
+  hlfir.assign %35 to %42
+  ...
+  omp.yield
+}
+omp.terminator
+  }
+  omp.terminator
+}
+```
+
+ Mapping to `device`
+
+
+
 

[llvm-branch-commits] [flang] [flang][OpenMP] Map simple `do concurrent` loops to OpenMP host constructs (PR #127633)

2025-02-20 Thread Kareem Ergawy via llvm-branch-commits


@@ -93,11 +169,14 @@ bool isPerfectlyNested(fir::DoLoopOp outerLoop, 
fir::DoLoopOp innerLoop) {
 /// recognize a certain nested loop as part of the nest it just returns the
 /// parent loops it discovered before.
 mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
-LoopNest &loopNest) {
+LoopNestToIndVarMap &loopNest) {
   assert(currentLoop.getUnordered());
 
   while (true) {
-loopNest.insert(currentLoop);
+loopNest.try_emplace(

ergawy wrote:

Strangely enough, there is no `emplace`. There is `insert` thought, this might 
look less weird. Used it.

https://github.com/llvm/llvm-project/pull/127633
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Extend `do concurrent` mapping to multi-range loops (PR #127634)

2025-02-20 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy updated 
https://github.com/llvm/llvm-project/pull/127634

>From 40d14156ff5d48ce94de96d8d23119a35b728dab Mon Sep 17 00:00:00 2001
From: ergawy 
Date: Tue, 18 Feb 2025 06:17:17 -0600
Subject: [PATCH] [flang][OpenMP] Extend `do concurrent` mapping to multi-range
 loops

Adds support for converting mulit-range loops to OpenMP (on the host
only for now). The changes here "prepare" a loop nest for collapsing by
sinking iteration variables to the innermost `fir.do_loop` op in the
nest.
---
 flang/docs/DoConcurrentConversionToOpenMP.md  |  29 
 .../OpenMP/DoConcurrentConversion.cpp | 139 +-
 .../multiple_iteration_ranges.f90 |  72 +
 3 files changed, 239 insertions(+), 1 deletion(-)
 create mode 100644 
flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90

diff --git a/flang/docs/DoConcurrentConversionToOpenMP.md 
b/flang/docs/DoConcurrentConversionToOpenMP.md
index 24ed35ca00403..c9bfd769e3033 100644
--- a/flang/docs/DoConcurrentConversionToOpenMP.md
+++ b/flang/docs/DoConcurrentConversionToOpenMP.md
@@ -173,6 +173,35 @@ omp.parallel {
 
 
 
+### Multi-range loops
+
+The pass currently supports multi-range loops as well. Given the following
+example:
+
+```fortran
+   do concurrent(i=1:n, j=1:m)
+   a(i,j) = i * j
+   end do
+```
+
+The generated `omp.loop_nest` operation look like:
+
+```
+omp.loop_nest (%arg0, %arg1)
+: index = (%17, %19) to (%18, %20)
+inclusive step (%c1_2, %c1_4) {
+  fir.store %arg0 to %private_i#1 : !fir.ref
+  fir.store %arg1 to %private_j#1 : !fir.ref
+  ...
+  omp.yield
+}
+```
+
+It is worth noting that we have privatized versions for both iteration
+variables: `i` and `j`. These are locally allocated inside the parallel/target
+OpenMP region similar to what the single-range example in previous section
+shows.
+
 

[llvm-branch-commits] [clang] HIP: Use builtin_nan instead of manual expansion (PR #128023)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/128023

I'm guessing the only reason the __make_mantissa* functions
exist were to support this, so maybe these can be deleted now.

This is broken in the non-constant string case, since it ends
up emitting a call to the libm function

>From 88b441975bc452c5b19d30b4d534fbce0b16dc0b Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 20 Nov 2022 08:51:42 -0800
Subject: [PATCH] HIP: Use builtin_nan instead of manual expansion

I'm guessing the only reason the __make_mantissa* functions
exist were to support this, so maybe these can be deleted now.

This is broken in the non-constant string case, since it ends
up emitting a call to the libm function
---
 clang/lib/Headers/__clang_hip_math.h | 45 ++--
 1 file changed, 3 insertions(+), 42 deletions(-)

diff --git a/clang/lib/Headers/__clang_hip_math.h 
b/clang/lib/Headers/__clang_hip_math.h
index 79cb7906852c4..8c21f5d882181 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -519,24 +519,8 @@ float modff(float __x, float *__iptr) {
 }
 
 __DEVICE__
-float nanf(const char *__tagp __attribute__((nonnull))) {
-  union {
-float val;
-struct ieee_float {
-  unsigned int mantissa : 22;
-  unsigned int quiet : 1;
-  unsigned int exponent : 8;
-  unsigned int sign : 1;
-} bits;
-  } __tmp;
-  __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
-
-  __tmp.bits.sign = 0u;
-  __tmp.bits.exponent = ~0u;
-  __tmp.bits.quiet = 1u;
-  __tmp.bits.mantissa = __make_mantissa(__tagp);
-
-  return __tmp.val;
+float nanf(const char *__tagp) {
+  return __builtin_nanf(__tagp);
 }
 
 __DEVICE__
@@ -1072,30 +1056,7 @@ double modf(double __x, double *__iptr) {
 
 __DEVICE__
 double nan(const char *__tagp) {
-#if !_WIN32
-  union {
-double val;
-struct ieee_double {
-  uint64_t mantissa : 51;
-  uint32_t quiet : 1;
-  uint32_t exponent : 11;
-  uint32_t sign : 1;
-} bits;
-  } __tmp;
-  __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
-
-  __tmp.bits.sign = 0u;
-  __tmp.bits.exponent = ~0u;
-  __tmp.bits.quiet = 1u;
-  __tmp.bits.mantissa = __make_mantissa(__tagp);
-
-  return __tmp.val;
-#else
-  __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
-  uint64_t __val = __make_mantissa(__tagp);
-  __val |= 0xFFF << 51;
-  return *reinterpret_cast(&__val);
-#endif
+  return __builtin_nan(__tagp);
 }
 
 __DEVICE__

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] HIP: Use builtin_nan instead of manual expansion (PR #128023)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/128023
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] HIP: Use builtin_nan instead of manual expansion (PR #128023)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Matt Arsenault (arsenm)


Changes

I'm guessing the only reason the __make_mantissa* functions
exist were to support this, so maybe these can be deleted now.

This is broken in the non-constant string case, since it ends
up emitting a call to the libm function

---
Full diff: https://github.com/llvm/llvm-project/pull/128023.diff


1 Files Affected:

- (modified) clang/lib/Headers/__clang_hip_math.h (+3-42) 


``diff
diff --git a/clang/lib/Headers/__clang_hip_math.h 
b/clang/lib/Headers/__clang_hip_math.h
index 79cb7906852c4..8c21f5d882181 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -519,24 +519,8 @@ float modff(float __x, float *__iptr) {
 }
 
 __DEVICE__
-float nanf(const char *__tagp __attribute__((nonnull))) {
-  union {
-float val;
-struct ieee_float {
-  unsigned int mantissa : 22;
-  unsigned int quiet : 1;
-  unsigned int exponent : 8;
-  unsigned int sign : 1;
-} bits;
-  } __tmp;
-  __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
-
-  __tmp.bits.sign = 0u;
-  __tmp.bits.exponent = ~0u;
-  __tmp.bits.quiet = 1u;
-  __tmp.bits.mantissa = __make_mantissa(__tagp);
-
-  return __tmp.val;
+float nanf(const char *__tagp) {
+  return __builtin_nanf(__tagp);
 }
 
 __DEVICE__
@@ -1072,30 +1056,7 @@ double modf(double __x, double *__iptr) {
 
 __DEVICE__
 double nan(const char *__tagp) {
-#if !_WIN32
-  union {
-double val;
-struct ieee_double {
-  uint64_t mantissa : 51;
-  uint32_t quiet : 1;
-  uint32_t exponent : 11;
-  uint32_t sign : 1;
-} bits;
-  } __tmp;
-  __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
-
-  __tmp.bits.sign = 0u;
-  __tmp.bits.exponent = ~0u;
-  __tmp.bits.quiet = 1u;
-  __tmp.bits.mantissa = __make_mantissa(__tagp);
-
-  return __tmp.val;
-#else
-  __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
-  uint64_t __val = __make_mantissa(__tagp);
-  __val |= 0xFFF << 51;
-  return *reinterpret_cast(&__val);
-#endif
+  return __builtin_nan(__tagp);
 }
 
 __DEVICE__

``




https://github.com/llvm/llvm-project/pull/128023
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] HIP: Use builtin_nan instead of manual expansion (PR #128023)

2025-02-20 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/128023?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#128023** https://app.graphite.dev/github/pr/llvm/llvm-project/128023?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/128023?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#128022** https://app.graphite.dev/github/pr/llvm/llvm-project/128022?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/128023
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Handle "loop-local values" in `do concurrent` nests (PR #127635)

2025-02-20 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy updated 
https://github.com/llvm/llvm-project/pull/127635

>From 2d9fb8f381a413c4451f812efc15636d5ab15825 Mon Sep 17 00:00:00 2001
From: ergawy 
Date: Tue, 18 Feb 2025 06:40:19 -0600
Subject: [PATCH] [flang][OpenMP] Handle "loop-local values" in `do concurrent`
 nests

Extends `do concurrent` mapping to handle "loop-local values". A loop-local
value is one that is used exclusively inside the loop but allocated outside
of it. This usually corresponds to temporary values that are used inside the
loop body for initialzing other variables for example. After collecting these
values, the pass localizes them to the loop nest by moving their allocations.
---
 flang/docs/DoConcurrentConversionToOpenMP.md  | 51 ++
 .../OpenMP/DoConcurrentConversion.cpp | 68 ++-
 .../DoConcurrent/locally_destroyed_temp.f90   | 62 +
 3 files changed, 180 insertions(+), 1 deletion(-)
 create mode 100644 
flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90

diff --git a/flang/docs/DoConcurrentConversionToOpenMP.md 
b/flang/docs/DoConcurrentConversionToOpenMP.md
index c9bfd769e3033..5505da17950e4 100644
--- a/flang/docs/DoConcurrentConversionToOpenMP.md
+++ b/flang/docs/DoConcurrentConversionToOpenMP.md
@@ -202,6 +202,57 @@ variables: `i` and `j`. These are locally allocated inside 
the parallel/target
 OpenMP region similar to what the single-range example in previous section
 shows.
 
+### Data environment
+
+By default, variables that are used inside a `do concurrent` loop nest are
+either treated as `shared` in case of mapping to `host`, or mapped into the
+`target` region using a `map` clause in case of mapping to `device`. The only
+exceptions to this are:
+  1. the loop's iteration variable(s) (IV) of **perfect** loop nests. In that
+ case, for each IV, we allocate a local copy as shown by the mapping
+ examples above.
+  1. any values that are from allocations outside the loop nest and used
+ exclusively inside of it. In such cases, a local privatized
+ copy is created in the OpenMP region to prevent multiple teams of threads
+ from accessing and destroying the same memory block, which causes runtime
+ issues. For an example of such cases, see
+ `flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90`.
+
+Implicit mapping detection (for mapping to the target device) is still quite
+limited and work to make it smarter is underway for both OpenMP in general 
+and `do concurrent` mapping.
+
+ Non-perfectly-nested loops' IVs
+
+For non-perfectly-nested loops, the IVs are still treated as `shared` or
+`map` entries as pointed out above. This **might not** be consistent with what
+the Fortran specification tells us. In particular, taking the following
+snippets from the spec (version 2023) into account:
+
+> § 3.35
+> --
+> construct entity
+> entity whose identifier has the scope of a construct
+
+> § 19.4
+> --
+>  A variable that appears as an index-name in a FORALL or DO CONCURRENT
+>  construct [...] is a construct entity. A variable that has LOCAL or
+>  LOCAL_INIT locality in a DO CONCURRENT construct is a construct entity.
+> [...]
+> The name of a variable that appears as an index-name in a DO CONCURRENT
+> construct, FORALL statement, or FORALL construct has a scope of the statement
+> or construct. A variable that has LOCAL or LOCAL_INIT locality in a DO
+> CONCURRENT construct has the scope of that construct.
+
+From the above quotes, it seems there is an equivalence between the IV of a `do
+concurrent` loop and a variable with a `LOCAL` locality specifier (equivalent
+to OpenMP's `private` clause). Which means that we should probably
+localize/privatize a `do concurrent` loop's IV even if it is not perfectly
+nested in the nest we are parallelizing. For now, however, we **do not** do
+that as pointed out previously. In the near future, we propose a middle-ground
+solution (see the Next steps section for more details).
+
 

[llvm-branch-commits] [clang] HIP: Use builtin_nan instead of manual expansion (PR #128023)

2025-02-20 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff c6c75b5d7a9a1869bfcdc413a98b00fe4b9f07d1 
88b441975bc452c5b19d30b4d534fbce0b16dc0b --extensions h -- 
clang/lib/Headers/__clang_hip_math.h
``





View the diff from clang-format here.


``diff
diff --git a/clang/lib/Headers/__clang_hip_math.h 
b/clang/lib/Headers/__clang_hip_math.h
index 8c21f5d882..1f62a06b50 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -519,9 +519,7 @@ float modff(float __x, float *__iptr) {
 }
 
 __DEVICE__
-float nanf(const char *__tagp) {
-  return __builtin_nanf(__tagp);
-}
+float nanf(const char *__tagp) { return __builtin_nanf(__tagp); }
 
 __DEVICE__
 float nearbyintf(float __x) { return __builtin_nearbyintf(__x); }
@@ -1055,9 +1053,7 @@ double modf(double __x, double *__iptr) {
 }
 
 __DEVICE__
-double nan(const char *__tagp) {
-  return __builtin_nan(__tagp);
-}
+double nan(const char *__tagp) { return __builtin_nan(__tagp); }
 
 __DEVICE__
 double nearbyint(double __x) { return __builtin_nearbyint(__x); }

``




https://github.com/llvm/llvm-project/pull/128023
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [HLSL] Implement default constant buffer `$Globals` (PR #125807)

2025-02-20 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner approved this pull request.


https://github.com/llvm/llvm-project/pull/125807
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)

2025-02-20 Thread Paschalis Mpeis via llvm-branch-commits

https://github.com/paschalis-mpeis edited 
https://github.com/llvm/llvm-project/pull/116964
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)

2025-02-20 Thread Paschalis Mpeis via llvm-branch-commits

paschalis-mpeis wrote:

- force-push to stack this PR on top of #127812.
- add code & test to ensure that we skip pending relocations only when 
`-force-patch` was set

https://github.com/llvm/llvm-project/pull/116964
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering (non i1) (PR #124298)

2025-02-20 Thread Petar Avramovic via llvm-branch-commits


@@ -188,6 +190,35 @@ void 
DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) {
   In.Reg = Copy.getReg(0);
 }
 
+void replaceUsesOfRegInInstWith(Register Reg, MachineInstr *Inst,
+Register NewReg) {
+  for (MachineOperand &Op : Inst->operands()) {
+if (Op.isReg() && Op.getReg() == Reg)
+  Op.setReg(NewReg);
+  }
+}
+
+bool DivergenceLoweringHelper::lowerTemporalDivergence() {
+  AMDGPU::IntrinsicLaneMaskAnalyzer ILMA(*MF);
+
+  for (auto [Inst, UseInst, _] : MUI->getTemporalDivergenceList()) {
+Register Reg = Inst->getOperand(0).getReg();
+if (MRI->getType(Reg) == LLT::scalar(1) || MUI->isDivergent(Reg) ||
+ILMA.isS32S64LaneMask(Reg))
+  continue;
+
+MachineBasicBlock *MBB = Inst->getParent();
+B.setInsertPt(*MBB, 
MBB->SkipPHIsAndLabels(std::next(Inst->getIterator(;
+
+Register VgprReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));

petar-avramovic wrote:

It unnecessarily  complicates new Reg bank select, regbankselect will set vgpr 
there. Also copy has implicit exec, should be special enough to indicate what 
we are doing.

https://github.com/llvm/llvm-project/pull/124298
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Skip out-of-range pending relocations (PR #116964)

2025-02-20 Thread Paschalis Mpeis via llvm-branch-commits

https://github.com/paschalis-mpeis edited 
https://github.com/llvm/llvm-project/pull/116964
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering (non i1) (PR #124298)

2025-02-20 Thread Petar Avramovic via llvm-branch-commits


@@ -188,6 +190,35 @@ void 
DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) {
   In.Reg = Copy.getReg(0);
 }
 
+void replaceUsesOfRegInInstWith(Register Reg, MachineInstr *Inst,
+Register NewReg) {
+  for (MachineOperand &Op : Inst->operands()) {
+if (Op.isReg() && Op.getReg() == Reg)
+  Op.setReg(NewReg);
+  }
+}
+
+bool DivergenceLoweringHelper::lowerTemporalDivergence() {
+  AMDGPU::IntrinsicLaneMaskAnalyzer ILMA(*MF);
+
+  for (auto [Inst, UseInst, _] : MUI->getTemporalDivergenceList()) {

petar-avramovic wrote:

Yes, true should map Register instead of Inst

https://github.com/llvm/llvm-project/pull/124298
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering (non i1) (PR #124298)

2025-02-20 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/124298

>From 538c0b43558d091886b844c8a1603e83d7aaf864 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Fri, 31 Jan 2025 13:04:17 +0100
Subject: [PATCH] AMDGPU/GlobalISel: Temporal divergence lowering (non i1)

Record all uses outside cycle with divergent exit during
propagateTemporalDivergence in Uniformity analysis.
With this list of candidates for temporal divergence lowering,
excluding known lane masks from control flow intrinsics,
find sources from inside the cycle that are not i1 and uniform.
Temporal divergence lowering (non i1):
create copy(v_mov) to vgpr, with implicit exec (to stop other
passes from moving this copy outside of the cycle) and use this
vgpr outside of the cycle instead of original uniform source.
---
 llvm/include/llvm/ADT/GenericUniformityImpl.h | 33 ++
 llvm/include/llvm/ADT/GenericUniformityInfo.h |  5 +++
 llvm/lib/Analysis/UniformityAnalysis.cpp  |  3 +-
 .../lib/CodeGen/MachineUniformityAnalysis.cpp |  6 +--
 .../AMDGPUGlobalISelDivergenceLowering.cpp| 45 ++-
 .../lib/Target/AMDGPU/AMDGPURegBankSelect.cpp | 25 +--
 llvm/lib/Target/AMDGPU/SILowerI1Copies.h  |  6 +++
 ...divergent-i1-phis-no-lane-mask-merging.mir |  7 +--
 ...ergence-divergent-i1-used-outside-loop.mir | 19 
 .../divergence-temporal-divergent-reg.ll  | 18 
 .../divergence-temporal-divergent-reg.mir |  3 +-
 .../AMDGPU/GlobalISel/regbankselect-mui.ll| 17 +++
 12 files changed, 146 insertions(+), 41 deletions(-)

diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h 
b/llvm/include/llvm/ADT/GenericUniformityImpl.h
index bd09f4fe43e08..d0f7bd1412065 100644
--- a/llvm/include/llvm/ADT/GenericUniformityImpl.h
+++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h
@@ -342,6 +342,9 @@ template  class 
GenericUniformityAnalysisImpl {
   typename SyncDependenceAnalysisT::DivergenceDescriptor;
   using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
 
+  using TemporalDivergenceTuple =
+  std::tuple;
+
   GenericUniformityAnalysisImpl(const DominatorTreeT &DT, const CycleInfoT &CI,
 const TargetTransformInfo *TTI)
   : Context(CI.getSSAContext()), F(*Context.getFunction()), CI(CI),
@@ -396,6 +399,11 @@ template  class 
GenericUniformityAnalysisImpl {
 
   void print(raw_ostream &out) const;
 
+  SmallVector TemporalDivergenceList;
+
+  void recordTemporalDivergence(const InstructionT *, const InstructionT *,
+const CycleT *);
+
 protected:
   /// \brief Value/block pair representing a single phi input.
   struct PhiInput {
@@ -1129,6 +1137,13 @@ void GenericUniformityAnalysisImpl::compute() {
   }
 }
 
+template 
+void GenericUniformityAnalysisImpl::recordTemporalDivergence(
+const InstructionT *Inst, const InstructionT *User, const CycleT *Cycle) {
+  TemporalDivergenceList.emplace_back(const_cast(Inst),
+  const_cast(User), Cycle);
+}
+
 template 
 bool GenericUniformityAnalysisImpl::isAlwaysUniform(
 const InstructionT &Instr) const {
@@ -1180,6 +1195,16 @@ void 
GenericUniformityAnalysisImpl::print(raw_ostream &OS) const {
 }
   }
 
+  if (!TemporalDivergenceList.empty()) {
+OS << "\nTEMPORAL DIVERGENCE LIST:\n";
+
+for (auto [Inst, UseInst, Cycle] : TemporalDivergenceList) {
+  OS << "Inst:" << Context.print(Inst)
+ << "Used by :" << Context.print(UseInst)
+ << "Outside cycle :" << Cycle->print(Context) << "\n\n";
+}
+  }
+
   for (auto &block : F) {
 OS << "\nBLOCK " << Context.print(&block) << '\n';
 
@@ -1210,6 +1235,14 @@ void 
GenericUniformityAnalysisImpl::print(raw_ostream &OS) const {
   }
 }
 
+template 
+iterator_range<
+typename GenericUniformityInfo::TemporalDivergenceTuple *>
+GenericUniformityInfo::getTemporalDivergenceList() const {
+  return make_range(DA->TemporalDivergenceList.begin(),
+DA->TemporalDivergenceList.end());
+}
+
 template 
 bool GenericUniformityInfo::hasDivergence() const {
   return DA->hasDivergence();
diff --git a/llvm/include/llvm/ADT/GenericUniformityInfo.h 
b/llvm/include/llvm/ADT/GenericUniformityInfo.h
index e53afccc020b4..8d3b141aaeded 100644
--- a/llvm/include/llvm/ADT/GenericUniformityInfo.h
+++ b/llvm/include/llvm/ADT/GenericUniformityInfo.h
@@ -40,6 +40,9 @@ template  class GenericUniformityInfo {
   using CycleInfoT = GenericCycleInfo;
   using CycleT = typename CycleInfoT::CycleT;
 
+  using TemporalDivergenceTuple =
+  std::tuple;
+
   GenericUniformityInfo(const DominatorTreeT &DT, const CycleInfoT &CI,
 const TargetTransformInfo *TTI = nullptr);
   GenericUniformityInfo() = default;
@@ -78,6 +81,8 @@ template  class GenericUniformityInfo {
 
   void print(raw_ostream &Out) const;
 
+  iterator_range getTemporalDivergenceList() const;
+
 private:
   using ImplT = Gene

[llvm-branch-commits] [llvm] release/20.x: [LLVM][AArch64] Remove aliases of LSUI instructions (#126072) (PR #127084)

2025-02-20 Thread via llvm-branch-commits

https://github.com/CarolineConcatto approved this pull request.

LGTM!

https://github.com/llvm/llvm-project/pull/127084
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [HLSL] Implement default constant buffer `$Globals` (PR #125807)

2025-02-20 Thread Helena Kotas via llvm-branch-commits

https://github.com/hekota updated 
https://github.com/llvm/llvm-project/pull/125807

>From 42bb34f66f0030f55e1055c4ee0b362511b7f45b Mon Sep 17 00:00:00 2001
From: Helena Kotas 
Date: Tue, 4 Feb 2025 22:01:49 -0800
Subject: [PATCH 1/5] [HLSL] Implement default constant buffer `$Globals`

All variable declarations in the global scope that are not resources, static or 
empty are implicitly added to implicit constant buffer `$Globals`.

Fixes #123801
---
 clang/include/clang/AST/Decl.h  | 22 +++
 clang/include/clang/Sema/SemaHLSL.h |  7 ++-
 clang/lib/AST/Decl.cpp  | 41 -
 clang/lib/CodeGen/CGHLSLRuntime.cpp |  7 +--
 clang/lib/CodeGen/CodeGenModule.cpp |  5 ++
 clang/lib/Sema/Sema.cpp |  3 +-
 clang/lib/Sema/SemaHLSL.cpp | 47 +--
 clang/test/AST/HLSL/default_cbuffer.hlsl| 50 
 clang/test/CodeGenHLSL/basic_types.hlsl | 64 ++---
 clang/test/CodeGenHLSL/default_cbuffer.hlsl | 43 ++
 10 files changed, 242 insertions(+), 47 deletions(-)
 create mode 100644 clang/test/AST/HLSL/default_cbuffer.hlsl
 create mode 100644 clang/test/CodeGenHLSL/default_cbuffer.hlsl

diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 05e56978977f2..f86ddaf89bd9c 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -5038,6 +5038,11 @@ class HLSLBufferDecl final : public NamedDecl, public 
DeclContext {
   // LayoutStruct - Layout struct for the buffer
   CXXRecordDecl *LayoutStruct;
 
+  // For default (implicit) constant buffer, a lisf of references of global
+  // decls that belong to the buffer. The decls are already parented by the
+  // translation unit context.
+  SmallVector DefaultBufferDecls;
+
   HLSLBufferDecl(DeclContext *DC, bool CBuffer, SourceLocation KwLoc,
  IdentifierInfo *ID, SourceLocation IDLoc,
  SourceLocation LBrace);
@@ -5047,6 +5052,8 @@ class HLSLBufferDecl final : public NamedDecl, public 
DeclContext {
 bool CBuffer, SourceLocation KwLoc,
 IdentifierInfo *ID, SourceLocation IDLoc,
 SourceLocation LBrace);
+  static HLSLBufferDecl *CreateDefaultCBuffer(ASTContext &C,
+  DeclContext *LexicalParent);
   static HLSLBufferDecl *CreateDeserialized(ASTContext &C, GlobalDeclID ID);
 
   SourceRange getSourceRange() const override LLVM_READONLY {
@@ -5061,6 +5068,7 @@ class HLSLBufferDecl final : public NamedDecl, public 
DeclContext {
   bool hasPackoffset() const { return HasPackoffset; }
   const CXXRecordDecl *getLayoutStruct() const { return LayoutStruct; }
   void addLayoutStruct(CXXRecordDecl *LS);
+  void addDefaultBufferDecl(Decl *D);
 
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
@@ -5072,6 +5080,20 @@ class HLSLBufferDecl final : public NamedDecl, public 
DeclContext {
 return static_cast(const_cast(DC));
   }
 
+  // Iterator for the buffer decls. Concatenates the list of decls parented
+  // by this HLSLBufferDecl with the list of default buffer decls.
+  using buffer_decl_iterator =
+  llvm::concat_iterator::const_iterator,
+decl_iterator>;
+  using buffer_decl_range = llvm::iterator_range;
+
+  buffer_decl_range buffer_decls() const {
+return buffer_decl_range(buffer_decls_begin(), buffer_decls_end());
+  }
+  buffer_decl_iterator buffer_decls_begin() const;
+  buffer_decl_iterator buffer_decls_end() const;
+  bool buffer_decls_empty();
+
   friend class ASTDeclReader;
   friend class ASTDeclWriter;
 };
diff --git a/clang/include/clang/Sema/SemaHLSL.h 
b/clang/include/clang/Sema/SemaHLSL.h
index f4cd11f423a84..b1cc856975532 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -103,13 +103,13 @@ class SemaHLSL : public SemaBase {
  HLSLParamModifierAttr::Spelling Spelling);
   void ActOnTopLevelFunction(FunctionDecl *FD);
   void ActOnVariableDeclarator(VarDecl *VD);
+  void ActOnEndOfTranslationUnit(TranslationUnitDecl *TU);
   void CheckEntryPoint(FunctionDecl *FD);
   void CheckSemanticAnnotation(FunctionDecl *EntryPoint, const Decl *Param,
const HLSLAnnotationAttr *AnnotationAttr);
   void DiagnoseAttrStageMismatch(
   const Attr *A, llvm::Triple::EnvironmentType Stage,
   std::initializer_list AllowedStages);
-  void DiagnoseAvailabilityViolations(TranslationUnitDecl *TU);
 
   QualType handleVectorBinOpConversion(ExprResult &LHS, ExprResult &RHS,
QualType LHSType, QualType RHSType,
@@ -159,11 +159,16 @@ class SemaHLSL : public SemaBase {
   // List of all resource bindings
   ResourceBindings Bindings;
 
+  // default constant buffer $Globals
+  HLSL

[llvm-branch-commits] [compiler-rt] 624bd5c - Revert "[Clang] [NFC] Fix more `-Wreturn-type` warnings in tests everywhere (…"

2025-02-20 Thread via llvm-branch-commits

Author: Sirraide
Date: 2025-02-20T20:02:05+01:00
New Revision: 624bd5c40f81a9ece34f65dfdc639621daeaaac9

URL: 
https://github.com/llvm/llvm-project/commit/624bd5c40f81a9ece34f65dfdc639621daeaaac9
DIFF: 
https://github.com/llvm/llvm-project/commit/624bd5c40f81a9ece34f65dfdc639621daeaaac9.diff

LOG: Revert "[Clang] [NFC] Fix more `-Wreturn-type` warnings in tests 
everywhere (…"

This reverts commit b0210fee94bc29a507f900da1fb97f0e50ab2637.

Added: 


Modified: 
clang-tools-extra/clangd/unittests/ASTTests.cpp
clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp
clang-tools-extra/clangd/unittests/ParsedASTTests.cpp
clang-tools-extra/clangd/unittests/QualityTests.cpp
clang-tools-extra/clangd/unittests/RenameTests.cpp
clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp
clang-tools-extra/clangd/unittests/SymbolInfoTests.cpp
clang-tools-extra/clangd/unittests/XRefsTests.cpp
clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp
clang-tools-extra/clangd/unittests/tweaks/ExpandDeducedTypeTests.cpp
clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp

clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/strings/internal-file.h
clang-tools-extra/test/clang-tidy/checkers/boost/use-to-string.cpp

clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-coro.cpp

clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape-rethrow.cpp
clang-tools-extra/test/clang-tidy/checkers/bugprone/exception-escape.cpp
clang-tools-extra/test/clang-tidy/checkers/bugprone/fold-init-type.cpp

clang-tools-extra/test/clang-tidy/checkers/bugprone/inc-dec-in-conditions-bitint-no-crash.c

clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.c

clang-tools-extra/test/clang-tidy/checkers/bugprone/spuriously-wake-up-functions.cpp
clang-tools-extra/test/clang-tidy/checkers/bugprone/stringview-nullptr.cpp

clang-tools-extra/test/clang-tidy/checkers/bugprone/suspicious-string-compare.cpp

clang-tools-extra/test/clang-tidy/checkers/fuchsia/default-arguments-calls.cpp
clang-tools-extra/test/clang-tidy/checkers/fuchsia/multiple-inheritance.cpp
clang-tools-extra/test/clang-tidy/checkers/google/runtime-int-std.cpp

clang-tools-extra/test/clang-tidy/checkers/google/upgrade-googletest-case.cpp

clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-transform-values.cpp
clang-tools-extra/test/clang-tidy/checkers/misc/const-correctness-values.cpp
clang-tools-extra/test/clang-tidy/checkers/misc/unused-parameters.cpp

clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp

clang-tools-extra/test/clang-tidy/checkers/modernize/Inputs/use-auto/containers.h
clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-bind.cpp

clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-c++20.cpp

clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-main.cpp

clang-tools-extra/test/clang-tidy/checkers/modernize/avoid-c-arrays-ignores-three-arg-main.cpp
clang-tools-extra/test/clang-tidy/checkers/modernize/loop-convert-basic.cpp
clang-tools-extra/test/clang-tidy/checkers/modernize/use-emplace.cpp

clang-tools-extra/test/clang-tidy/checkers/modernize/use-equals-default-copy.cpp
clang-tools-extra/test/clang-tidy/checkers/modernize/use-override.cpp
clang-tools-extra/test/clang-tidy/checkers/modernize/use-std-format.cpp

clang-tools-extra/test/clang-tidy/checkers/modernize/use-trailing-return-type.cpp

clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header-fixed.h

clang-tools-extra/test/clang-tidy/checkers/performance/Inputs/unnecessary-value-param/header.h

clang-tools-extra/test/clang-tidy/checkers/performance/inefficient-string-concatenation.cpp

clang-tools-extra/test/clang-tidy/checkers/performance/unnecessary-value-param-header.cpp

clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style1/header.h

clang-tools-extra/test/clang-tidy/checkers/readability/Inputs/identifier-naming/global-style2/header.h

clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type-macros.cpp
clang-tools-extra/test/clang-tidy/checkers/readability/const-return-type.cpp

clang-tools-extra/test/clang-tidy/checkers/readability/convert-member-functions-to-static.cpp
clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp

clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.cpp
clang-tools-extra/test/clang-tidy/checkers/readability/named-parameter.cpp

clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.c

clang-tools-extra/test/clang-tidy/checkers/readability/redundant-declaration.cpp
   

[llvm-branch-commits] [clang] HIP: Use builtin_nan instead of manual expansion (PR #128023)

2025-02-20 Thread via llvm-branch-commits

b-sumner wrote:

> why do we want to do this if it is broken for non-literal string?

Right.  It doesn't seem likely that it won't be literal, but no documentation 
I've found requires it to be.

https://github.com/llvm/llvm-project/pull/128023
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [HLSL] Add support for root constant generation from llvm IR. (PR #127932)

2025-02-20 Thread via llvm-branch-commits

https://github.com/joaosaffran updated 
https://github.com/llvm/llvm-project/pull/127932

>From 86a52d82e858c24a0f756f583a1b3d8dac3087d8 Mon Sep 17 00:00:00 2001
From: joaosaffran 
Date: Wed, 19 Feb 2025 22:53:55 +
Subject: [PATCH 1/4] parsing root constant

---
 llvm/lib/Target/DirectX/DXILRootSignature.cpp | 32 +++
 1 file changed, 32 insertions(+)

diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp 
b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
index 8702f0eecf2aa..6f7e3418782a3 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
@@ -40,6 +40,38 @@ static bool reportError(LLVMContext *Ctx, Twine Message,
   return true;
 }
 
+static bool parseRootConstants(LLVMContext *Ctx, mcdxbc::RootSignatureDesc 
&RSD,
+   MDNode *RootConstNode) {
+  if (RootConstNode->getNumOperands() != 5)
+return reportError(Ctx, "Invalid format for RootFlag Element");
+
+  dxbc::RootParameter NewParam;
+  NewParam.ParameterType = dxbc::RootParameterType::Constants32Bit;
+
+  auto *ShaderVisibility =
+  mdconst::extract(RootConstNode->getOperand(1));
+  dxbc::ShaderVisibilityFlag SvFlag =
+  (dxbc::ShaderVisibilityFlag)ShaderVisibility->getZExtValue();
+  if (!dxbc::RootSignatureValidations::isValidShaderVisibility(SvFlag))
+return reportError(
+Ctx, "Invalid shader visibility flag value in root constant.");
+  NewParam.ShaderVisibility = SvFlag;
+
+  auto *ShaderRegister =
+  mdconst::extract(RootConstNode->getOperand(2));
+  NewParam.Constants.ShaderRegister = ShaderRegister->getZExtValue();
+
+  auto *RegisterSpace =
+  mdconst::extract(RootConstNode->getOperand(3));
+  NewParam.Constants.RegisterSpace = RegisterSpace->getZExtValue();
+
+  auto *Num32BitValues =
+  mdconst::extract(RootConstNode->getOperand(4));
+  NewParam.Constants.Num32BitValues = Num32BitValues->getZExtValue();
+
+  return false;
+}
+
 static bool parseRootFlags(LLVMContext *Ctx, mcdxbc::RootSignatureDesc &RSD,
MDNode *RootFlagNode) {
 

>From f181f4dbae982c948e81aff41bcb84f59048fbed Mon Sep 17 00:00:00 2001
From: joaosaffran 
Date: Thu, 20 Feb 2025 00:51:23 +
Subject: [PATCH 2/4] add root constant support

---
 llvm/lib/Target/DirectX/DXILRootSignature.cpp | 36 +--
 llvm/lib/Target/DirectX/DXILRootSignature.h   |  7 +++-
 ...nature-Constants-Error-invalid-metadata.ll | 17 +
 ...nstants-Error-invalid-shader-visibility.ll | 17 +
 .../ContainerData/RootSignature-Constants.ll  | 33 +
 5 files changed, 107 insertions(+), 3 deletions(-)
 create mode 100644 
llvm/test/CodeGen/DirectX/ContainerData/RootSignature-Constants-Error-invalid-metadata.ll
 create mode 100644 
llvm/test/CodeGen/DirectX/ContainerData/RootSignature-Constants-Error-invalid-shader-visibility.ll
 create mode 100644 
llvm/test/CodeGen/DirectX/ContainerData/RootSignature-Constants.ll

diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp 
b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
index 6f7e3418782a3..388a3f5e8af0b 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
@@ -43,7 +43,7 @@ static bool reportError(LLVMContext *Ctx, Twine Message,
 static bool parseRootConstants(LLVMContext *Ctx, mcdxbc::RootSignatureDesc 
&RSD,
MDNode *RootConstNode) {
   if (RootConstNode->getNumOperands() != 5)
-return reportError(Ctx, "Invalid format for RootFlag Element");
+return reportError(Ctx, "Invalid format for Root constants element");
 
   dxbc::RootParameter NewParam;
   NewParam.ParameterType = dxbc::RootParameterType::Constants32Bit;
@@ -69,6 +69,8 @@ static bool parseRootConstants(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
   mdconst::extract(RootConstNode->getOperand(4));
   NewParam.Constants.Num32BitValues = Num32BitValues->getZExtValue();
 
+  RSD.Parameters.push_back(NewParam);
+
   return false;
 }
 
@@ -94,10 +96,12 @@ static bool parseRootSignatureElement(LLVMContext *Ctx,
   RootSignatureElementKind ElementKind =
   StringSwitch(ElementText->getString())
   .Case("RootFlags", RootSignatureElementKind::RootFlags)
+  .Case("RootConstants", RootSignatureElementKind::RootConstants)
   .Default(RootSignatureElementKind::Error);
 
   switch (ElementKind) {
-
+  case RootSignatureElementKind::RootConstants:
+return parseRootConstants(Ctx, RSD, Element);
   case RootSignatureElementKind::RootFlags:
 return parseRootFlags(Ctx, RSD, Element);
   case RootSignatureElementKind::Error:
@@ -241,6 +245,34 @@ PreservedAnalyses RootSignatureAnalysisPrinter::run(Module 
&M,
 OS << indent(Space) << "NumStaticSamplers: " << 0 << ":\n";
 OS << indent(Space) << "StaticSamplersOffset: "
<< sizeof(RS.Header) + RS.Parameters.size_in_bytes() << ":\n";
+
+OS << indent(Space) << "- Parameters: \n";
+Space++;
+f

[llvm-branch-commits] [llvm] 239faf0 - [LLVM][AArch64] Remove aliases of LSUI instructions (#126072)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

Author: Kerry McLaughlin
Date: 2025-02-20T12:49:54-08:00
New Revision: 239faf0b9dbcf092337a8feb696f1f9bf0671241

URL: 
https://github.com/llvm/llvm-project/commit/239faf0b9dbcf092337a8feb696f1f9bf0671241
DIFF: 
https://github.com/llvm/llvm-project/commit/239faf0b9dbcf092337a8feb696f1f9bf0671241.diff

LOG: [LLVM][AArch64] Remove aliases of LSUI instructions (#126072)

Removes MnemonicAliases added for instructions available with
the LSUI feature (e.g. CAS -> CAST) which are not equivalent.
The aliases stt[add|clr|set]a & stt[add|clr|set]al are also removed.

(cherry picked from commit d44d806faa879dfb7a7ceb58beeb57cf8d5af430)

Added: 


Modified: 
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/MC/AArch64/armv8.1a-lse.s
llvm/test/MC/AArch64/armv9.6a-lsui.s

Removed: 




diff  --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 3bb5d3cb4d09d..c2eea836fb14f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -12529,26 +12529,19 @@ multiclass STOPregister {
 !cast(instr # "X")>;
 }
 
+let Predicates = [HasLSUI] in
 class BaseSTOPregisterLSUI :
-  InstAlias;
+  InstAlias;
 
 multiclass STOPregisterLSUI {
-  def : BaseSTOPregisterLSUI(instr # "W")>;
-  def : BaseSTOPregisterLSUI(instr # "X")>;
-  def : BaseSTOPregisterLSUI(instr # "W")>;
-  def : BaseSTOPregisterLSUI(instr # "X")>;
-  def : BaseSTOPregisterLSUI(instr # "W")>;
-  def : BaseSTOPregisterLSUI(instr # "X")>;
-  def : BaseSTOPregisterLSUI(instr # "LW")>;
+  def : BaseSTOPregisterLSUI(instr # "LX")>;
+  def : BaseSTOPregisterLSUI(instr # "W")>;
-  def : BaseSTOPregisterLSUI(instr # "X")>;
 }
 

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index b77246200db64..a3a607825c7f6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2667,21 +2667,11 @@ defm CASLT  : CompareAndSwapUnprivileged<0b11, 0, 1, 
"l">;
 defm CASAT  : CompareAndSwapUnprivileged<0b11, 1, 0, "a">;
 defm CASALT : CompareAndSwapUnprivileged<0b11, 1, 1, "al">;
 
-def : MnemonicAlias<"cas", "cast">;
-def : MnemonicAlias<"casl", "caslt">;
-def : MnemonicAlias<"casa", "casat">;
-def : MnemonicAlias<"casal", "casalt">;
-
 // v9.6-a atomic CASPT
 defm CASPT   : CompareAndSwapPairUnprivileged<0b01, 0, 0, "">;
 defm CASPLT  : CompareAndSwapPairUnprivileged<0b01, 0, 1, "l">;
 defm CASPAT  : CompareAndSwapPairUnprivileged<0b01, 1, 0, "a">;
 defm CASPALT : CompareAndSwapPairUnprivileged<0b01, 1, 1, "al">;
-
-def : MnemonicAlias<"casp", "caspt">;
-def : MnemonicAlias<"caspl", "casplt">;
-def : MnemonicAlias<"caspa", "caspat">;
-def : MnemonicAlias<"caspal", "caspalt">;
 }
 
 // v8.1 atomic SWP
@@ -2696,11 +2686,6 @@ let Predicates = [HasLSUI] in {
   defm SWPTA  : SwapLSUI<1, 0, "a">;
   defm SWPTL  : SwapLSUI<0, 1, "l">;
   defm SWPTAL : SwapLSUI<1, 1, "al">;
-
-  def : MnemonicAlias<"swp", "swpt">;
-  def : MnemonicAlias<"swpa", "swpta">;
-  def : MnemonicAlias<"swpl", "swptl">;
-  def : MnemonicAlias<"swpal", "swptal">;
 }
 
 // v9.6-a unprivileged atomic LD (FEAT_LSUI)
@@ -4865,22 +4850,14 @@ let Predicates = [HasLSUI] in {
 defm LDTXRW : LoadUnprivilegedLSUI<0b10, GPR32, "ldtxr">;
 defm LDTXRX : LoadUnprivilegedLSUI<0b11, GPR64, "ldtxr">;
 
-def : MnemonicAlias<"ldxr", "ldtxr">;
-
 def LDATXRW : LoadExclusiveLSUI <0b10, 1, 1, GPR32, "ldatxr">;
 def LDATXRX : LoadExclusiveLSUI <0b11, 1, 1, GPR64, "ldatxr">;
 
-def : MnemonicAlias<"ldaxr", "ldatxr">;
-
 defm STTXRW : StoreUnprivilegedLSUI<0b10, GPR32, "sttxr">;
 defm STTXRX : StoreUnprivilegedLSUI<0b11, GPR64, "sttxr">;
 
-def : MnemonicAlias<"stxr", "sttxr">;
-
 def STLTXRW : StoreExclusiveLSUI<0b10, 0, 1, GPR32, "stltxr">;
 def STLTXRX : StoreExclusiveLSUI<0b11, 0, 1, GPR64, "stltxr">;
-
-def : MnemonicAlias<"stlxr", "stltxr">;
 }
 
 
//===--===//

diff  --git a/llvm/test/MC/AArch64/armv8.1a-lse.s 
b/llvm/test/MC/AArch64/armv8.1a-lse.s
index b5bbbe66c6ae2..eb9a30dea9e6b 100644
--- a/llvm/test/MC/AArch64/armv8.1a-lse.s
+++ b/llvm/test/MC/AArch64/armv8.1a-lse.s
@@ -7,6 +7,8 @@
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -mcpu=tsv110 -show-encoding 
< %s 2> %t | FileCheck %s
 // RUN: FileCheck -check-prefix=CHECK-ERROR < %t %s
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8r -show-encoding 
< %s 2> %t | FileCheck %s
+// RUN: FileCheck -check-prefix=CHECK-ERROR < %t %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a,+lse,+lsui 
-show-encoding < %s 2> %t | FileCheck %s
 // RUN: FileCheck -check-prefix=CHECK-ERROR < %t %s
   .text
 

diff  --git a/llvm/test/MC/AArch64/armv9.6a-lsui.s 
b/llvm/test/MC/AArch64/armv9.6a-lsui.s
in

[llvm-branch-commits] [llvm] release/20.x: [LLVM][AArch64] Remove aliases of LSUI instructions (#126072) (PR #127084)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/127084
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [LLVM][AArch64] Remove aliases of LSUI instructions (#126072) (PR #127084)

2025-02-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

@kmclaughlin-arm (or anyone else). If you would like to add a note about this 
fix in the release notes (completely optional). Please reply to this comment 
with a one or two sentence description of the fix.  When you are done, please 
add the release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/127084
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++][TZDB] Fixes mapping of nonexisting time. (#127330) (PR #127531)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/127531

>From e7e2c16cac97696a89a053f72b94112739b6897f Mon Sep 17 00:00:00 2001
From: Mark de Wever 
Date: Mon, 17 Feb 2025 19:08:07 +0100
Subject: [PATCH] [libc++][TZDB] Fixes mapping of nonexisting time. (#127330)

All non-existing local times in a contiguous range should map to the
same time point. This fixes a bug, were the times inside the range were
mapped to the wrong time.

Fixes: #113654
(cherry picked from commit 941f7cbf5a3e7aa9f36b002dc22cfdb4ff50fea8)
---
 libcxx/include/__chrono/time_zone.h |  8 ++--
 .../time.zone.members/to_sys_choose.pass.cpp| 17 +++--
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/libcxx/include/__chrono/time_zone.h 
b/libcxx/include/__chrono/time_zone.h
index ab5c22eceaaf1..d18d59d2736bf 100644
--- a/libcxx/include/__chrono/time_zone.h
+++ b/libcxx/include/__chrono/time_zone.h
@@ -103,10 +103,14 @@ class _LIBCPP_AVAILABILITY_TZDB time_zone {
   to_sys(const local_time<_Duration>& __time, choose __z) const {
 local_info __info = get_info(__time);
 switch (__info.result) {
-case local_info::unique:
-case local_info::nonexistent: // first and second are the same
+case local_info::unique: // first and second are the same
   return sys_time>{__time.time_since_epoch() - __info.first.offset};
 
+case local_info::nonexistent:
+  // first and second are the same
+  // All non-existing values are converted to the same time.
+  return sys_time>{__info.first.end};
+
 case local_info::ambiguous:
   switch (__z) {
   case choose::earliest:
diff --git 
a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp
 
b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp
index bad4ef352e9b9..1147c9fadf9ae 100644
--- 
a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp
+++ 
b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp
@@ -88,7 +88,7 @@ static void test_nonexistent() {
   // Pick an historic date where it's well known what the time zone rules were.
   // This makes it unlikely updates to the database change these rules.
   std::chrono::local_time time{
-  (std::chrono::sys_days{std::chrono::March / 30 / 1986} + 2h + 
30min).time_since_epoch()};
+  (std::chrono::sys_days{std::chrono::March / 30 / 1986} + 
2h).time_since_epoch()};
 
   std::chrono::sys_seconds expected{time.time_since_epoch() - 1h};
 
@@ -100,6 +100,13 @@ static void test_nonexistent() {
   assert(tz->to_sys(time + 0us, std::chrono::choose::latest) == expected);
   assert(tz->to_sys(time + 0ms, std::chrono::choose::earliest) == expected);
   assert(tz->to_sys(time + 0s, std::chrono::choose::latest) == expected);
+
+  // The entire nonexisting hour should map to the same time.
+  // For nonexistant the value of std::chrono::choose has no effect.
+  assert(tz->to_sys(time + 1s, std::chrono::choose::earliest) == expected);
+  assert(tz->to_sys(time + 1min, std::chrono::choose::latest) == expected);
+  assert(tz->to_sys(time + 30min, std::chrono::choose::earliest) == expected);
+  assert(tz->to_sys(time + 59min + 59s, std::chrono::choose::latest) == 
expected);
 }
 
 // Tests ambiguous conversions.
@@ -120,7 +127,7 @@ static void test_ambiguous() {
   // Pick an historic date where it's well known what the time zone rules were.
   // This makes it unlikely updates to the database change these rules.
   std::chrono::local_time time{
-  (std::chrono::sys_days{std::chrono::September / 28 / 1986} + 2h + 
30min).time_since_epoch()};
+  (std::chrono::sys_days{std::chrono::September / 28 / 1986} + 
2h).time_since_epoch()};
 
   std::chrono::sys_seconds earlier{time.time_since_epoch() - 2h};
   std::chrono::sys_seconds later{time.time_since_epoch() - 1h};
@@ -133,6 +140,12 @@ static void test_ambiguous() {
   assert(tz->to_sys(time + 0us, std::chrono::choose::latest) == later);
   assert(tz->to_sys(time + 0ms, std::chrono::choose::earliest) == earlier);
   assert(tz->to_sys(time + 0s, std::chrono::choose::latest) == later);
+
+  // Test times in the ambigious hour
+  assert(tz->to_sys(time + 1s, std::chrono::choose::earliest) == earlier + 1s);
+  assert(tz->to_sys(time + 1min, std::chrono::choose::latest) == later + 1min);
+  assert(tz->to_sys(time + 30min, std::chrono::choose::earliest) == earlier + 
30min);
+  assert(tz->to_sys(time + 59min + 59s, std::chrono::choose::latest) == later 
+ 59min + 59s);
 }
 
 // This test does the basic validations of this function. The library function

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++][TZDB] Fixes mapping of nonexisting time. (#127330) (PR #127531)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/127531
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] e7e2c16 - [libc++][TZDB] Fixes mapping of nonexisting time. (#127330)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

Author: Mark de Wever
Date: 2025-02-20T12:56:22-08:00
New Revision: e7e2c16cac97696a89a053f72b94112739b6897f

URL: 
https://github.com/llvm/llvm-project/commit/e7e2c16cac97696a89a053f72b94112739b6897f
DIFF: 
https://github.com/llvm/llvm-project/commit/e7e2c16cac97696a89a053f72b94112739b6897f.diff

LOG: [libc++][TZDB] Fixes mapping of nonexisting time. (#127330)

All non-existing local times in a contiguous range should map to the
same time point. This fixes a bug, were the times inside the range were
mapped to the wrong time.

Fixes: #113654
(cherry picked from commit 941f7cbf5a3e7aa9f36b002dc22cfdb4ff50fea8)

Added: 


Modified: 
libcxx/include/__chrono/time_zone.h

libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp

Removed: 




diff  --git a/libcxx/include/__chrono/time_zone.h 
b/libcxx/include/__chrono/time_zone.h
index ab5c22eceaaf1..d18d59d2736bf 100644
--- a/libcxx/include/__chrono/time_zone.h
+++ b/libcxx/include/__chrono/time_zone.h
@@ -103,10 +103,14 @@ class _LIBCPP_AVAILABILITY_TZDB time_zone {
   to_sys(const local_time<_Duration>& __time, choose __z) const {
 local_info __info = get_info(__time);
 switch (__info.result) {
-case local_info::unique:
-case local_info::nonexistent: // first and second are the same
+case local_info::unique: // first and second are the same
   return sys_time>{__time.time_since_epoch() - __info.first.offset};
 
+case local_info::nonexistent:
+  // first and second are the same
+  // All non-existing values are converted to the same time.
+  return sys_time>{__info.first.end};
+
 case local_info::ambiguous:
   switch (__z) {
   case choose::earliest:

diff  --git 
a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp
 
b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp
index bad4ef352e9b9..1147c9fadf9ae 100644
--- 
a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp
+++ 
b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/to_sys_choose.pass.cpp
@@ -88,7 +88,7 @@ static void test_nonexistent() {
   // Pick an historic date where it's well known what the time zone rules were.
   // This makes it unlikely updates to the database change these rules.
   std::chrono::local_time time{
-  (std::chrono::sys_days{std::chrono::March / 30 / 1986} + 2h + 
30min).time_since_epoch()};
+  (std::chrono::sys_days{std::chrono::March / 30 / 1986} + 
2h).time_since_epoch()};
 
   std::chrono::sys_seconds expected{time.time_since_epoch() - 1h};
 
@@ -100,6 +100,13 @@ static void test_nonexistent() {
   assert(tz->to_sys(time + 0us, std::chrono::choose::latest) == expected);
   assert(tz->to_sys(time + 0ms, std::chrono::choose::earliest) == expected);
   assert(tz->to_sys(time + 0s, std::chrono::choose::latest) == expected);
+
+  // The entire nonexisting hour should map to the same time.
+  // For nonexistant the value of std::chrono::choose has no effect.
+  assert(tz->to_sys(time + 1s, std::chrono::choose::earliest) == expected);
+  assert(tz->to_sys(time + 1min, std::chrono::choose::latest) == expected);
+  assert(tz->to_sys(time + 30min, std::chrono::choose::earliest) == expected);
+  assert(tz->to_sys(time + 59min + 59s, std::chrono::choose::latest) == 
expected);
 }
 
 // Tests ambiguous conversions.
@@ -120,7 +127,7 @@ static void test_ambiguous() {
   // Pick an historic date where it's well known what the time zone rules were.
   // This makes it unlikely updates to the database change these rules.
   std::chrono::local_time time{
-  (std::chrono::sys_days{std::chrono::September / 28 / 1986} + 2h + 
30min).time_since_epoch()};
+  (std::chrono::sys_days{std::chrono::September / 28 / 1986} + 
2h).time_since_epoch()};
 
   std::chrono::sys_seconds earlier{time.time_since_epoch() - 2h};
   std::chrono::sys_seconds later{time.time_since_epoch() - 1h};
@@ -133,6 +140,12 @@ static void test_ambiguous() {
   assert(tz->to_sys(time + 0us, std::chrono::choose::latest) == later);
   assert(tz->to_sys(time + 0ms, std::chrono::choose::earliest) == earlier);
   assert(tz->to_sys(time + 0s, std::chrono::choose::latest) == later);
+
+  // Test times in the ambigious hour
+  assert(tz->to_sys(time + 1s, std::chrono::choose::earliest) == earlier + 1s);
+  assert(tz->to_sys(time + 1min, std::chrono::choose::latest) == later + 1min);
+  assert(tz->to_sys(time + 30min, std::chrono::choose::earliest) == earlier + 
30min);
+  assert(tz->to_sys(time + 59min + 59s, std::chrono::choose::latest) == later 
+ 59min + 59s);
 }
 
 // This test does the basic validations of this function. The library function



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailma

[llvm-branch-commits] [libcxx] release/20.x: [libc++][TZDB] Fixes mapping of nonexisting time. (#127330) (PR #127531)

2025-02-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

@mordante (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/127531
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: workflows/release-binaries: Disable Flang on x86_64 macOS (#127216) (PR #127951)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/127951

>From 6ea0367372362e50ddaa6ec51a502c1ca19d26d7 Mon Sep 17 00:00:00 2001
From: Tom Stellard 
Date: Wed, 19 Feb 2025 19:16:30 -0800
Subject: [PATCH] workflows/release-binaries: Disable Flang on x86_64 macOS
 (#127216)

The flang build was taking 2-3 hours and causing the entire job to
timeout, so we need to disable it.

(cherry picked from commit 3e5ae5777d92b6f8c647c3f6969fbca0f0f769ff)
---
 .github/workflows/release-binaries.yml | 5 +
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/release-binaries.yml 
b/.github/workflows/release-binaries.yml
index 204ee6405382f..fa3dfa5b7d313 100644
--- a/.github/workflows/release-binaries.yml
+++ b/.github/workflows/release-binaries.yml
@@ -138,6 +138,11 @@ jobs:
 arches=arm64
   else
 arches=x86_64
+# Disable Flang builds on macOS x86_64.  The FortranLower library 
takes
+# 2-3 hours to build on macOS, much slower than on Linux.
+# The long build time causes the release build to time out on 
x86_64,
+# so we need to disable flang there.
+target_cmake_flags="$target_cmake_flags 
-DLLVM_RELEASE_ENABLE_PROJECTS='clang;lld;lldb;clang-tools-extra;bolt;polly;mlir'"
   fi
   target_cmake_flags="$target_cmake_flags 
-DBOOTSTRAP_DARWIN_osx_ARCHS=$arches 
-DBOOTSTRAP_DARWIN_osx_BUILTIN_ARCHS=$arches"
 fi

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: workflows/release-binaries: Disable Flang on x86_64 macOS (#127216) (PR #127951)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/127951
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: workflows/release-binaries: Disable Flang on x86_64 macOS (#127216) (PR #127951)

2025-02-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

@tstellar (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/127951
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++] Set feature-test macro `__cpp_lib_atomic_float` (#127559) (PR #127732)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/127732

>From 05cce88ab900af4a4e08018e96f55b0c308ab973 Mon Sep 17 00:00:00 2001
From: "A. Jiang" 
Date: Wed, 19 Feb 2025 09:06:51 +0800
Subject: [PATCH] [libc++] Set feature-test macro `__cpp_lib_atomic_float`
 (#127559)

The corresponding feature was implemented in LLVM 18 (by #67799), but
this FTM wasn't added before.

(cherry picked from commit 2207e3e32549306bf563c6987f790cabe8d4ea78)
---
 libcxx/docs/FeatureTestMacroTable.rst |  2 +-
 libcxx/docs/Status/Cxx20Papers.csv|  2 +-
 libcxx/include/version|  2 +-
 .../atomic.version.compile.pass.cpp   | 48 ++-
 .../version.version.compile.pass.cpp  | 48 ++-
 .../generate_feature_test_macro_components.py |  1 -
 6 files changed, 33 insertions(+), 70 deletions(-)

diff --git a/libcxx/docs/FeatureTestMacroTable.rst 
b/libcxx/docs/FeatureTestMacroTable.rst
index ccaa784ccb088..dcf9838edd74b 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -174,7 +174,7 @@ Status
 -- 
-
 ``__cpp_lib_atomic_flag_test`` ``201907L``
 -- 
-
-``__cpp_lib_atomic_float`` *unimplemented*
+``__cpp_lib_atomic_float`` ``201711L``
 -- 
-
 ``__cpp_lib_atomic_lock_free_type_aliases````201907L``
 -- 
-
diff --git a/libcxx/docs/Status/Cxx20Papers.csv 
b/libcxx/docs/Status/Cxx20Papers.csv
index 524c6d0ac8be0..b595da3728841 100644
--- a/libcxx/docs/Status/Cxx20Papers.csv
+++ b/libcxx/docs/Status/Cxx20Papers.csv
@@ -2,7 +2,7 @@
 "`P0463R1 `__","Endian just Endian","2017-07 
(Toronto)","|Complete|","7",""
 "`P0674R1 `__","Extending make_shared to Support 
Arrays","2017-07 (Toronto)","|Complete|","15",""
 "","","","","",""
-"`P0020R6 `__","Floating Point Atomic","2017-11 
(Albuquerque)","|Complete|","18",""
+"`P0020R6 `__","Floating Point Atomic","2017-11 
(Albuquerque)","|Complete|","18","The feature-test macro was not set until LLVM 
20."
 "`P0053R7 `__","C++ Synchronized Buffered 
Ostream","2017-11 (Albuquerque)","|Complete|","18",""
 "`P0202R3 `__","Add constexpr modifiers to 
functions in  and  Headers","2017-11 
(Albuquerque)","|Complete|","12",""
 "`P0415R1 `__","Constexpr for ``std::complex``\ 
","2017-11 (Albuquerque)","|Complete|","16",""
diff --git a/libcxx/include/version b/libcxx/include/version
index c5966b90c061d..63ead9fd5d29d 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -378,7 +378,7 @@ __cpp_lib_void_t
201411L 
 # define __cpp_lib_array_constexpr  201811L
 # define __cpp_lib_assume_aligned   201811L
 # define __cpp_lib_atomic_flag_test 201907L
-// # define __cpp_lib_atomic_float 201711L
+# define __cpp_lib_atomic_float 201711L
 # define __cpp_lib_atomic_lock_free_type_aliases201907L
 # define __cpp_lib_atomic_ref   201806L
 // # define __cpp_lib_atomic_shared_ptr201711L
diff --git 
a/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp
 
b/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp
index 9ed18fbfe19ac..5a21e6320bffe 100644
--- 
a/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp
+++ 
b/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp
@@ -169,17 +169,11 @@
 #   error "__cpp_lib_atomic_flag_test should have the value 201907L in c++20"
 # endif
 
-# if !defined(_LIBCPP_VERSION)
-#   ifndef __cpp_lib_atomic_float
-# error "__cpp_lib_atomic_float should be defined in c++20"
-#   endif
-#   if __cpp_lib_atomic_float != 201711L
-# error "__cpp_lib_atomic_float should have the value 201711L in c++20"
-#   endif
-# else // _LIBCPP_VERSION
-#   ifdef __cpp_lib_atomic_float
-# error "__cpp_lib_atomic_float should not be defined because it is 
unimplemented in libc++!"
-#   endif
+# ifndef __cpp_lib_atomic_float
+#   error "__cpp_lib_atomic_float should be defined in c++20"
+# endif
+# if __cpp_lib_atomic_float != 201711L
+#   error "__cpp_lib_atomic_float should have the value 201711L in c++20"
 # endif
 
 # ifndef __cpp_lib_atom

[llvm-branch-commits] [libcxx] 05cce88 - [libc++] Set feature-test macro `__cpp_lib_atomic_float` (#127559)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

Author: A. Jiang
Date: 2025-02-20T13:05:05-08:00
New Revision: 05cce88ab900af4a4e08018e96f55b0c308ab973

URL: 
https://github.com/llvm/llvm-project/commit/05cce88ab900af4a4e08018e96f55b0c308ab973
DIFF: 
https://github.com/llvm/llvm-project/commit/05cce88ab900af4a4e08018e96f55b0c308ab973.diff

LOG: [libc++] Set feature-test macro `__cpp_lib_atomic_float` (#127559)

The corresponding feature was implemented in LLVM 18 (by #67799), but
this FTM wasn't added before.

(cherry picked from commit 2207e3e32549306bf563c6987f790cabe8d4ea78)

Added: 


Modified: 
libcxx/docs/FeatureTestMacroTable.rst
libcxx/docs/Status/Cxx20Papers.csv
libcxx/include/version

libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp

libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
libcxx/utils/generate_feature_test_macro_components.py

Removed: 




diff  --git a/libcxx/docs/FeatureTestMacroTable.rst 
b/libcxx/docs/FeatureTestMacroTable.rst
index ccaa784ccb088..dcf9838edd74b 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -174,7 +174,7 @@ Status
 -- 
-
 ``__cpp_lib_atomic_flag_test`` ``201907L``
 -- 
-
-``__cpp_lib_atomic_float`` *unimplemented*
+``__cpp_lib_atomic_float`` ``201711L``
 -- 
-
 ``__cpp_lib_atomic_lock_free_type_aliases````201907L``
 -- 
-

diff  --git a/libcxx/docs/Status/Cxx20Papers.csv 
b/libcxx/docs/Status/Cxx20Papers.csv
index 524c6d0ac8be0..b595da3728841 100644
--- a/libcxx/docs/Status/Cxx20Papers.csv
+++ b/libcxx/docs/Status/Cxx20Papers.csv
@@ -2,7 +2,7 @@
 "`P0463R1 `__","Endian just Endian","2017-07 
(Toronto)","|Complete|","7",""
 "`P0674R1 `__","Extending make_shared to Support 
Arrays","2017-07 (Toronto)","|Complete|","15",""
 "","","","","",""
-"`P0020R6 `__","Floating Point Atomic","2017-11 
(Albuquerque)","|Complete|","18",""
+"`P0020R6 `__","Floating Point Atomic","2017-11 
(Albuquerque)","|Complete|","18","The feature-test macro was not set until LLVM 
20."
 "`P0053R7 `__","C++ Synchronized Buffered 
Ostream","2017-11 (Albuquerque)","|Complete|","18",""
 "`P0202R3 `__","Add constexpr modifiers to 
functions in  and  Headers","2017-11 
(Albuquerque)","|Complete|","12",""
 "`P0415R1 `__","Constexpr for ``std::complex``\ 
","2017-11 (Albuquerque)","|Complete|","16",""

diff  --git a/libcxx/include/version b/libcxx/include/version
index c5966b90c061d..63ead9fd5d29d 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -378,7 +378,7 @@ __cpp_lib_void_t
201411L 
 # define __cpp_lib_array_constexpr  201811L
 # define __cpp_lib_assume_aligned   201811L
 # define __cpp_lib_atomic_flag_test 201907L
-// # define __cpp_lib_atomic_float 201711L
+# define __cpp_lib_atomic_float 201711L
 # define __cpp_lib_atomic_lock_free_type_aliases201907L
 # define __cpp_lib_atomic_ref   201806L
 // # define __cpp_lib_atomic_shared_ptr201711L

diff  --git 
a/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp
 
b/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp
index 9ed18fbfe19ac..5a21e6320bffe 100644
--- 
a/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp
+++ 
b/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp
@@ -169,17 +169,11 @@
 #   error "__cpp_lib_atomic_flag_test should have the value 201907L in c++20"
 # endif
 
-# if !defined(_LIBCPP_VERSION)
-#   ifndef __cpp_lib_atomic_float
-# error "__cpp_lib_atomic_float should be defined in c++20"
-#   endif
-#   if __cpp_lib_atomic_float != 201711L
-# error "__cpp_lib_atomic_float should have the value 201711L in c++20"
-#   endif
-# else // _LIBCPP_VERSION
-#   ifdef __cpp_lib_atomic_float
-# error "__cpp_lib_atomic_float should not be defined because it is 
unimplemented in libc++!"
-#   endif
+# ifndef __cpp_lib_atomic_float
+#   error "__cpp_lib_atomic_float should be defined in c++

[llvm-branch-commits] [libcxx] release/20.x: [libc++] Set feature-test macro `__cpp_lib_atomic_float` (#127559) (PR #127732)

2025-02-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

@frederick-vs-ja (or anyone else). If you would like to add a note about this 
fix in the release notes (completely optional). Please reply to this comment 
with a one or two sentence description of the fix.  When you are done, please 
add the release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/127732
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [GlobalISel][AArch64] Fix fptoi.sat lowering. (#127901) (PR #128001)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/128001

>From 6dcece41472d64555f0ca2539a3e1e1e5feec083 Mon Sep 17 00:00:00 2001
From: David Green 
Date: Thu, 20 Feb 2025 12:22:11 +
Subject: [PATCH] [GlobalISel][AArch64] Fix fptoi.sat lowering. (#127901)

The SDAG version uses fminnum/fmaxnum, in converting it to fcmp+select
it appears the order of the operands was chosen badly. This switches the
conditions used to keep the constant on the RHS.

(cherry picked from commit 70ed381b1693697dec3efcaed161d3626d16cff1)
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp|   4 +-
 .../test/CodeGen/AArch64/fptosi-sat-scalar.ll |  12 +-
 .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 144 +-
 .../test/CodeGen/AArch64/fptoui-sat-scalar.ll |  12 +-
 .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 144 +-
 5 files changed, 158 insertions(+), 158 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp 
b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d0a62340a5f32..536c193d52080 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7801,13 +7801,13 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
   if (AreExactFloatBounds) {
 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
-auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
+auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
  SrcTy.changeElementSize(1), Src, MaxC);
 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
 // Clamp by MaxFloat from above. NaN cannot occur.
 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
 auto MinP =
-MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), 
Max,
+MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), 
Max,
  MinC, MachineInstr::FmNoNans);
 auto Min =
 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll 
b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index bfb5c67801e6c..39e2db3a52d2c 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -987,25 +987,25 @@ define i32 @test_signed_f128_i32(fp128 %f) {
 ; CHECK-GI-NEXT:adrp x8, .LCPI30_1
 ; CHECK-GI-NEXT:str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI30_1]
-; CHECK-GI-NEXT:bl __getf2
+; CHECK-GI-NEXT:bl __gttf2
 ; CHECK-GI-NEXT:ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:cmp w0, #0
 ; CHECK-GI-NEXT:mov x9, #-4603241769126068224 // =0xc01e
 ; CHECK-GI-NEXT:fmov x8, d0
-; CHECK-GI-NEXT:csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:mov x8, v0.d[1]
 ; CHECK-GI-NEXT:mov v0.d[0], x19
-; CHECK-GI-NEXT:csel x20, x8, x9, lt
+; CHECK-GI-NEXT:csel x20, x8, x9, gt
 ; CHECK-GI-NEXT:adrp x8, .LCPI30_0
 ; CHECK-GI-NEXT:mov v0.d[1], x20
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT:bl __gttf2
+; CHECK-GI-NEXT:bl __lttf2
 ; CHECK-GI-NEXT:cmp w0, #0
-; CHECK-GI-NEXT:csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:mov v0.d[0], x8
 ; CHECK-GI-NEXT:mov x8, #281474976448512 // =0xfffc
 ; CHECK-GI-NEXT:movk x8, #16413, lsl #48
-; CHECK-GI-NEXT:csel x8, x20, x8, gt
+; CHECK-GI-NEXT:csel x8, x20, x8, lt
 ; CHECK-GI-NEXT:mov v0.d[1], x8
 ; CHECK-GI-NEXT:bl __fixtfsi
 ; CHECK-GI-NEXT:ldr q0, [sp] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll 
b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index b2b3430f4d85e..67d625dd16473 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -535,25 +535,25 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> 
%f) {
 ; CHECK-GI-NEXT:adrp x8, .LCPI14_1
 ; CHECK-GI-NEXT:str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI14_1]
-; CHECK-GI-NEXT:bl __getf2
+; CHECK-GI-NEXT:bl __gttf2
 ; CHECK-GI-NEXT:ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:cmp w0, #0
 ; CHECK-GI-NEXT:mov x9, #-4603241769126068224 // =0xc01e
 ; CHECK-GI-NEXT:fmov x8, d0
-; CHECK-GI-NEXT:csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:mov x8, v0.d[1]
 ; CHECK-GI-NEXT:mov v0.d[0], x19
-; CHECK-GI-NEXT:csel x20, x8, x9, lt
+; CHECK-GI-NEXT:csel x20, x8, x9, gt
 ; CHECK-GI-NEXT:adrp x8, .LCPI14_0
 ; CHECK-GI-NEXT:mov v0.d[1], x20
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI14_0]
-; CHECK-GI-NEXT:bl __gttf2
+; CHECK-GI-NEXT:bl __lttf2
 ; CH

[llvm-branch-commits] [llvm] release/20.x: [GlobalISel][AArch64] Fix fptoi.sat lowering. (#127901) (PR #128001)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/128001
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [GlobalISel][AArch64] Fix fptoi.sat lowering. (#127901) (PR #128001)

2025-02-20 Thread via llvm-branch-commits

github-actions[bot] wrote:

@davemgreen (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/128001
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++] Set feature-test macro `__cpp_lib_atomic_float` (#127559) (PR #127732)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/127732
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [LLVM][AArch64] Remove aliases of LSUI instructions (#126072) (PR #127084)

2025-02-20 Thread via llvm-branch-commits

https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/127084

>From 239faf0b9dbcf092337a8feb696f1f9bf0671241 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin 
Date: Thu, 13 Feb 2025 09:43:16 +
Subject: [PATCH] [LLVM][AArch64] Remove aliases of LSUI instructions (#126072)

Removes MnemonicAliases added for instructions available with
the LSUI feature (e.g. CAS -> CAST) which are not equivalent.
The aliases stt[add|clr|set]a & stt[add|clr|set]al are also removed.

(cherry picked from commit d44d806faa879dfb7a7ceb58beeb57cf8d5af430)
---
 .../lib/Target/AArch64/AArch64InstrFormats.td |  23 ++--
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  23 
 llvm/test/MC/AArch64/armv8.1a-lse.s   |   2 +
 llvm/test/MC/AArch64/armv9.6a-lsui.s  | 102 +++---
 4 files changed, 22 insertions(+), 128 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td 
b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 3bb5d3cb4d09d..c2eea836fb14f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -12529,26 +12529,19 @@ multiclass STOPregister {
 !cast(instr # "X")>;
 }
 
+let Predicates = [HasLSUI] in
 class BaseSTOPregisterLSUI :
-  InstAlias;
+  InstAlias;
 
 multiclass STOPregisterLSUI {
-  def : BaseSTOPregisterLSUI(instr # "W")>;
-  def : BaseSTOPregisterLSUI(instr # "X")>;
-  def : BaseSTOPregisterLSUI(instr # "W")>;
-  def : BaseSTOPregisterLSUI(instr # "X")>;
-  def : BaseSTOPregisterLSUI(instr # "W")>;
-  def : BaseSTOPregisterLSUI(instr # "X")>;
-  def : BaseSTOPregisterLSUI(instr # "LW")>;
+  def : BaseSTOPregisterLSUI(instr # "LX")>;
+  def : BaseSTOPregisterLSUI(instr # "W")>;
-  def : BaseSTOPregisterLSUI(instr # "X")>;
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index b77246200db64..a3a607825c7f6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2667,21 +2667,11 @@ defm CASLT  : CompareAndSwapUnprivileged<0b11, 0, 1, 
"l">;
 defm CASAT  : CompareAndSwapUnprivileged<0b11, 1, 0, "a">;
 defm CASALT : CompareAndSwapUnprivileged<0b11, 1, 1, "al">;
 
-def : MnemonicAlias<"cas", "cast">;
-def : MnemonicAlias<"casl", "caslt">;
-def : MnemonicAlias<"casa", "casat">;
-def : MnemonicAlias<"casal", "casalt">;
-
 // v9.6-a atomic CASPT
 defm CASPT   : CompareAndSwapPairUnprivileged<0b01, 0, 0, "">;
 defm CASPLT  : CompareAndSwapPairUnprivileged<0b01, 0, 1, "l">;
 defm CASPAT  : CompareAndSwapPairUnprivileged<0b01, 1, 0, "a">;
 defm CASPALT : CompareAndSwapPairUnprivileged<0b01, 1, 1, "al">;
-
-def : MnemonicAlias<"casp", "caspt">;
-def : MnemonicAlias<"caspl", "casplt">;
-def : MnemonicAlias<"caspa", "caspat">;
-def : MnemonicAlias<"caspal", "caspalt">;
 }
 
 // v8.1 atomic SWP
@@ -2696,11 +2686,6 @@ let Predicates = [HasLSUI] in {
   defm SWPTA  : SwapLSUI<1, 0, "a">;
   defm SWPTL  : SwapLSUI<0, 1, "l">;
   defm SWPTAL : SwapLSUI<1, 1, "al">;
-
-  def : MnemonicAlias<"swp", "swpt">;
-  def : MnemonicAlias<"swpa", "swpta">;
-  def : MnemonicAlias<"swpl", "swptl">;
-  def : MnemonicAlias<"swpal", "swptal">;
 }
 
 // v9.6-a unprivileged atomic LD (FEAT_LSUI)
@@ -4865,22 +4850,14 @@ let Predicates = [HasLSUI] in {
 defm LDTXRW : LoadUnprivilegedLSUI<0b10, GPR32, "ldtxr">;
 defm LDTXRX : LoadUnprivilegedLSUI<0b11, GPR64, "ldtxr">;
 
-def : MnemonicAlias<"ldxr", "ldtxr">;
-
 def LDATXRW : LoadExclusiveLSUI <0b10, 1, 1, GPR32, "ldatxr">;
 def LDATXRX : LoadExclusiveLSUI <0b11, 1, 1, GPR64, "ldatxr">;
 
-def : MnemonicAlias<"ldaxr", "ldatxr">;
-
 defm STTXRW : StoreUnprivilegedLSUI<0b10, GPR32, "sttxr">;
 defm STTXRX : StoreUnprivilegedLSUI<0b11, GPR64, "sttxr">;
 
-def : MnemonicAlias<"stxr", "sttxr">;
-
 def STLTXRW : StoreExclusiveLSUI<0b10, 0, 1, GPR32, "stltxr">;
 def STLTXRX : StoreExclusiveLSUI<0b11, 0, 1, GPR64, "stltxr">;
-
-def : MnemonicAlias<"stlxr", "stltxr">;
 }
 
 
//===--===//
diff --git a/llvm/test/MC/AArch64/armv8.1a-lse.s 
b/llvm/test/MC/AArch64/armv8.1a-lse.s
index b5bbbe66c6ae2..eb9a30dea9e6b 100644
--- a/llvm/test/MC/AArch64/armv8.1a-lse.s
+++ b/llvm/test/MC/AArch64/armv8.1a-lse.s
@@ -7,6 +7,8 @@
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -mcpu=tsv110 -show-encoding 
< %s 2> %t | FileCheck %s
 // RUN: FileCheck -check-prefix=CHECK-ERROR < %t %s
 // RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8r -show-encoding 
< %s 2> %t | FileCheck %s
+// RUN: FileCheck -check-prefix=CHECK-ERROR < %t %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.1a,+lse,+lsui 
-show-encoding < %s 2> %t | FileCheck %s
 // RUN: FileCheck -check-prefix=CHECK-ERROR < %t %s
   .text
 
diff --git a/llvm/test/MC/AArch64/armv9.6a-lsui.s 
b/llvm/test/MC/AArch64/armv9.6a-lsui.s
index b48db1f9b5570..d4a5e1f980560 100644
--- a/llvm/test/MC/AArch64/armv9.6a-lsui.

[llvm-branch-commits] [llvm] 6dcece4 - [GlobalISel][AArch64] Fix fptoi.sat lowering. (#127901)

2025-02-20 Thread Tom Stellard via llvm-branch-commits

Author: David Green
Date: 2025-02-20T13:08:59-08:00
New Revision: 6dcece41472d64555f0ca2539a3e1e1e5feec083

URL: 
https://github.com/llvm/llvm-project/commit/6dcece41472d64555f0ca2539a3e1e1e5feec083
DIFF: 
https://github.com/llvm/llvm-project/commit/6dcece41472d64555f0ca2539a3e1e1e5feec083.diff

LOG: [GlobalISel][AArch64] Fix fptoi.sat lowering. (#127901)

The SDAG version uses fminnum/fmaxnum, in converting it to fcmp+select
it appears the order of the operands was chosen badly. This switches the
conditions used to keep the constant on the RHS.

(cherry picked from commit 70ed381b1693697dec3efcaed161d3626d16cff1)

Added: 


Modified: 
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp 
b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d0a62340a5f32..536c193d52080 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7801,13 +7801,13 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
   if (AreExactFloatBounds) {
 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
-auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
+auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
  SrcTy.changeElementSize(1), Src, MaxC);
 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
 // Clamp by MaxFloat from above. NaN cannot occur.
 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
 auto MinP =
-MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), 
Max,
+MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), 
Max,
  MinC, MachineInstr::FmNoNans);
 auto Min =
 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);

diff  --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll 
b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index bfb5c67801e6c..39e2db3a52d2c 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -987,25 +987,25 @@ define i32 @test_signed_f128_i32(fp128 %f) {
 ; CHECK-GI-NEXT:adrp x8, .LCPI30_1
 ; CHECK-GI-NEXT:str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI30_1]
-; CHECK-GI-NEXT:bl __getf2
+; CHECK-GI-NEXT:bl __gttf2
 ; CHECK-GI-NEXT:ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:cmp w0, #0
 ; CHECK-GI-NEXT:mov x9, #-4603241769126068224 // =0xc01e
 ; CHECK-GI-NEXT:fmov x8, d0
-; CHECK-GI-NEXT:csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:mov x8, v0.d[1]
 ; CHECK-GI-NEXT:mov v0.d[0], x19
-; CHECK-GI-NEXT:csel x20, x8, x9, lt
+; CHECK-GI-NEXT:csel x20, x8, x9, gt
 ; CHECK-GI-NEXT:adrp x8, .LCPI30_0
 ; CHECK-GI-NEXT:mov v0.d[1], x20
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT:bl __gttf2
+; CHECK-GI-NEXT:bl __lttf2
 ; CHECK-GI-NEXT:cmp w0, #0
-; CHECK-GI-NEXT:csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:mov v0.d[0], x8
 ; CHECK-GI-NEXT:mov x8, #281474976448512 // =0xfffc
 ; CHECK-GI-NEXT:movk x8, #16413, lsl #48
-; CHECK-GI-NEXT:csel x8, x20, x8, gt
+; CHECK-GI-NEXT:csel x8, x20, x8, lt
 ; CHECK-GI-NEXT:mov v0.d[1], x8
 ; CHECK-GI-NEXT:bl __fixtfsi
 ; CHECK-GI-NEXT:ldr q0, [sp] // 16-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll 
b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index b2b3430f4d85e..67d625dd16473 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -535,25 +535,25 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> 
%f) {
 ; CHECK-GI-NEXT:adrp x8, .LCPI14_1
 ; CHECK-GI-NEXT:str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI14_1]
-; CHECK-GI-NEXT:bl __getf2
+; CHECK-GI-NEXT:bl __gttf2
 ; CHECK-GI-NEXT:ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:cmp w0, #0
 ; CHECK-GI-NEXT:mov x9, #-4603241769126068224 // =0xc01e
 ; CHECK-GI-NEXT:fmov x8, d0
-; CHECK-GI-NEXT:csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:mov x8, v0.d[1]
 ; CHECK-GI-NEXT:mov v0.d[0], x19
-; CHECK-GI-NEXT:csel x20, x8, x9, lt
+; CHECK-GI-NEXT:csel x20, x8, x9, gt
 ; CHECK-GI-NEXT:adrp x8, .LCPI14_0
 ; CHECK-GI-NEXT:mov v0.d[1], x20
 ; CHECK-GI-NEXT:ldr q1, [x8, :

[llvm-branch-commits] [llvm] release/20.x: [GlobalISel][AArch64] Fix fptoi.sat lowering. (#127901) (PR #128001)

2025-02-20 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-globalisel

Author: None (llvmbot)


Changes

Backport 70ed381b1693697dec3efcaed161d3626d16cff1

Requested by: @davemgreen

---

Patch is 34.38 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/128001.diff


5 Files Affected:

- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll (+6-6) 
- (modified) llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll (+72-72) 
- (modified) llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll (+6-6) 
- (modified) llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll (+72-72) 


``diff
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp 
b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d0a62340a5f32..536c193d52080 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -7801,13 +7801,13 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
   if (AreExactFloatBounds) {
 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
-auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
+auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
  SrcTy.changeElementSize(1), Src, MaxC);
 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
 // Clamp by MaxFloat from above. NaN cannot occur.
 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
 auto MinP =
-MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), 
Max,
+MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), 
Max,
  MinC, MachineInstr::FmNoNans);
 auto Min =
 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll 
b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index bfb5c67801e6c..39e2db3a52d2c 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -987,25 +987,25 @@ define i32 @test_signed_f128_i32(fp128 %f) {
 ; CHECK-GI-NEXT:adrp x8, .LCPI30_1
 ; CHECK-GI-NEXT:str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI30_1]
-; CHECK-GI-NEXT:bl __getf2
+; CHECK-GI-NEXT:bl __gttf2
 ; CHECK-GI-NEXT:ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:cmp w0, #0
 ; CHECK-GI-NEXT:mov x9, #-4603241769126068224 // =0xc01e
 ; CHECK-GI-NEXT:fmov x8, d0
-; CHECK-GI-NEXT:csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:mov x8, v0.d[1]
 ; CHECK-GI-NEXT:mov v0.d[0], x19
-; CHECK-GI-NEXT:csel x20, x8, x9, lt
+; CHECK-GI-NEXT:csel x20, x8, x9, gt
 ; CHECK-GI-NEXT:adrp x8, .LCPI30_0
 ; CHECK-GI-NEXT:mov v0.d[1], x20
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT:bl __gttf2
+; CHECK-GI-NEXT:bl __lttf2
 ; CHECK-GI-NEXT:cmp w0, #0
-; CHECK-GI-NEXT:csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:mov v0.d[0], x8
 ; CHECK-GI-NEXT:mov x8, #281474976448512 // =0xfffc
 ; CHECK-GI-NEXT:movk x8, #16413, lsl #48
-; CHECK-GI-NEXT:csel x8, x20, x8, gt
+; CHECK-GI-NEXT:csel x8, x20, x8, lt
 ; CHECK-GI-NEXT:mov v0.d[1], x8
 ; CHECK-GI-NEXT:bl __fixtfsi
 ; CHECK-GI-NEXT:ldr q0, [sp] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll 
b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index b2b3430f4d85e..67d625dd16473 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -535,25 +535,25 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> 
%f) {
 ; CHECK-GI-NEXT:adrp x8, .LCPI14_1
 ; CHECK-GI-NEXT:str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI14_1]
-; CHECK-GI-NEXT:bl __getf2
+; CHECK-GI-NEXT:bl __gttf2
 ; CHECK-GI-NEXT:ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:cmp w0, #0
 ; CHECK-GI-NEXT:mov x9, #-4603241769126068224 // =0xc01e
 ; CHECK-GI-NEXT:fmov x8, d0
-; CHECK-GI-NEXT:csel x19, x8, xzr, lt
+; CHECK-GI-NEXT:csel x19, x8, xzr, gt
 ; CHECK-GI-NEXT:mov x8, v0.d[1]
 ; CHECK-GI-NEXT:mov v0.d[0], x19
-; CHECK-GI-NEXT:csel x20, x8, x9, lt
+; CHECK-GI-NEXT:csel x20, x8, x9, gt
 ; CHECK-GI-NEXT:adrp x8, .LCPI14_0
 ; CHECK-GI-NEXT:mov v0.d[1], x20
 ; CHECK-GI-NEXT:ldr q1, [x8, :lo12:.LCPI14_0]
-; CHECK-GI-NEXT:bl __gttf2
+; CHECK-GI-NEXT:bl __lttf2
 ; CHECK-GI-NEXT:cmp w0, #0
-; CHECK-GI-NEXT:csel x8, x19, xzr, gt
+; CHECK-GI-NEXT:csel x8, x19, xzr, lt
 ; CHECK-GI-NEXT:mov v0.d[0], x8
 ; CHECK-GI-NEXT:mov x8, #281474976448512 // =0xfffc

[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-20 Thread via llvm-branch-commits


@@ -5294,10 +5294,14 @@ def err_template_missing_args : Error<
   "%select{class template|function template|variable template|alias template|"
   "template template parameter|concept|template}0 %1 requires template "
   "arguments">;
-def err_template_arg_list_different_arity : Error<
-  "%select{too few|too many}0 template arguments for "
+def err_template_param_missing_arg : Error<
+  "missing template argument for template parameter">;
+def err_template_template_param_missing_param : Error<
+  "missing template parameter to bind to template template parameter">;

cor3ntin wrote:

The formulation of this is a bit weird.
Maybe: "template template argument 'foo' incompatible with 'bar': template 
parameter list arity mismatch"

https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-20 Thread via llvm-branch-commits


@@ -12822,6 +12826,9 @@ class Sema final : public SemaBase {
 
   /// We are performing partial ordering for template template parameters.
   PartialOrderingTTP,
+
+  /// Checking a Template Parameter

cor3ntin wrote:

This could use a more descriptive comment

https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement instantiation context note for checking template parameters (PR #126088)

2025-02-20 Thread via llvm-branch-commits


@@ -11802,9 +11817,10 @@ class Sema final : public SemaBase {
  bool PartialOrdering,
  bool *StrictPackMatch);
 
+  SmallString<128> toTerseString(const NamedDecl &D) const;

cor3ntin wrote:

Given this is used 2 times, i wonder if it's actually useful. especially in 
Sema.
If we really think we need thar, I'd rather have it be a new overload of 
Decl::print, or `Decl::printTerse` or something like that

https://github.com/llvm/llvm-project/pull/126088
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


  1   2   >