[llvm-branch-commits] AArch64: Emit PAuth ifuncs into the same comdat as the containing global. (PR #170944)

2025-12-08 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer approved this pull request.


https://github.com/llvm/llvm-project/pull/170944
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-tidy] add abseil-unchecked-statusor-access (PR #171188)

2025-12-08 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer updated 
https://github.com/llvm/llvm-project/pull/171188

>From 3a7705624359678edaed5c7b9686cae034cb4bfd Mon Sep 17 00:00:00 2001
From: Florian Mayer 
Date: Mon, 8 Dec 2025 13:10:30 -0800
Subject: [PATCH 1/2] change

Created using spr 1.3.7
---
 .../clang-tidy/abseil/UncheckedStatusOrAccessCheck.h| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clang-tidy/abseil/UncheckedStatusOrAccessCheck.h 
b/clang-tools-extra/clang-tidy/abseil/UncheckedStatusOrAccessCheck.h
index cf47703f0a972..8fefee4691be6 100644
--- a/clang-tools-extra/clang-tidy/abseil/UncheckedStatusOrAccessCheck.h
+++ b/clang-tools-extra/clang-tidy/abseil/UncheckedStatusOrAccessCheck.h
@@ -10,7 +10,7 @@ namespace clang::tidy::abseil {
 // assuring that it contains a value.
 //
 // For details on the dataflow analysis implemented in this check see:
-// http://google3/devtools/cymbal/nullability/statusor
+// clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp
 class UncheckedStatusOrAccessCheck : public ClangTidyCheck {
 public:
   using ClangTidyCheck::ClangTidyCheck;

>From d020c4b77da52906b21b382650e664439c3aaa65 Mon Sep 17 00:00:00 2001
From: Florian Mayer 
Date: Mon, 8 Dec 2025 17:49:15 -0800
Subject: [PATCH 2/2] test

Created using spr 1.3.7
---
 .../abseil/Inputs/absl/meta/type_traits.h |  46 ++
 .../abseil/Inputs/absl/status/status.h|  69 +++
 .../abseil/Inputs/absl/status/statusor.h  | 346 ++
 .../checkers/abseil/Inputs/cstddef.h  |  10 +
 .../checkers/abseil/Inputs/initializer_list   |  11 +
 .../checkers/abseil/Inputs/type_traits| 427 ++
 .../abseil-unchecked-statusor-access.cpp  | 138 ++
 7 files changed, 1047 insertions(+)
 create mode 100644 
clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/meta/type_traits.h
 create mode 100644 
clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/status/status.h
 create mode 100644 
clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/status/statusor.h
 create mode 100644 
clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/cstddef.h
 create mode 100644 
clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/initializer_list
 create mode 100644 
clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/type_traits
 create mode 100644 
clang-tools-extra/test/clang-tidy/checkers/abseil/abseil-unchecked-statusor-access.cpp

diff --git 
a/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/meta/type_traits.h
 
b/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/meta/type_traits.h
new file mode 100644
index 0..06ce61dbcc1e7
--- /dev/null
+++ 
b/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/meta/type_traits.h
@@ -0,0 +1,46 @@
+#include 
+
+namespace absl {
+
+template 
+struct conjunction : std::true_type {};
+
+template 
+struct conjunction
+: std::conditional, T>::type {};
+
+template 
+struct conjunction : T {};
+
+template 
+struct disjunction : std::false_type {};
+
+template 
+struct disjunction
+: std::conditional>::type {};
+
+template 
+struct disjunction : T {};
+
+template 
+struct negation : std::integral_constant {};
+
+template 
+using enable_if_t = typename std::enable_if::type;
+
+
+template 
+using conditional_t = typename std::conditional::type;
+
+template 
+using remove_cv_t = typename std::remove_cv::type;
+
+template 
+using remove_reference_t = typename std::remove_reference::type;
+
+template 
+using decay_t = typename std::decay::type;
+
+using std::in_place;
+using std::in_place_t;
+} // namespace absl
diff --git 
a/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/status/status.h 
b/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/status/status.h
new file mode 100644
index 0..fd0910e81436a
--- /dev/null
+++ 
b/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/status/status.h
@@ -0,0 +1,69 @@
+namespace absl {
+struct SourceLocation {
+  static constexpr SourceLocation current();
+  static constexpr SourceLocation
+  DoNotInvokeDirectlyNoSeriouslyDont(int line, const char *file_name);
+};
+} // namespace absl
+namespace absl {
+enum class StatusCode : int {
+  kOk,
+  kCancelled,
+  kUnknown,
+  kInvalidArgument,
+  kDeadlineExceeded,
+  kNotFound,
+  kAlreadyExists,
+  kPermissionDenied,
+  kResourceExhausted,
+  kFailedPrecondition,
+  kAborted,
+  kOutOfRange,
+  kUnimplemented,
+  kInternal,
+  kUnavailable,
+  kDataLoss,
+  kUnauthenticated,
+};
+} // namespace absl
+
+namespace absl {
+enum class StatusToStringMode : int {
+  kWithNoExtraData = 0,
+  kWithPayload = 1 << 0,
+  kWithSourceLocation = 1 << 1,
+  kWithEverything = ~kWithNoExtraData,
+  kDefault = kWithPayload,
+};
+class Status {
+public:
+  Status();
+  Status(const Status &base_status, absl::SourceLocation loc);
+  Status(Status &&base_status, absl::SourceLocation loc);
+  ~Status() {}
+
+  Status(const St

[llvm-branch-commits] [clang-tools-extra] [clang-tidy] add abseil-unchecked-statusor-access (PR #171188)

2025-12-08 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer edited 
https://github.com/llvm/llvm-project/pull/171188
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-tidy] add abseil-unchecked-statusor-access (PR #171188)

2025-12-08 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer ready_for_review 
https://github.com/llvm/llvm-project/pull/171188
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)

2025-12-08 Thread Stefan Gränitz via llvm-branch-commits




weliveindetail wrote:

That's fair

https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)

2025-12-08 Thread Gang Chen via llvm-branch-commits


@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 
-amdgpu-enable-machine-level-inliner < %s | FileCheck %s
+
+declare !callback !0 i32 @llvm.amdgcn.call.whole.wave.i32.p0(ptr, ...)
+
+define amdgpu_cs void @inline_simple_wwf(i32 %input, ptr addrspace(1) %output) 
{
+; CHECK-LABEL: inline_simple_wwf:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_mov_b32 s1, simple_whole_wave_func@abs32@hi
+; CHECK-NEXT:s_mov_b32 s0, simple_whole_wave_func@abs32@lo
+; CHECK-NEXT:s_mov_b32 s32, 0
+; CHECK-NEXT:v_dual_mov_b32 v41, v2 :: v_dual_mov_b32 v40, v1
+; CHECK-NEXT:s_swappc_b64 s[30:31], s[0:1]
+; CHECK-NEXT:global_store_b32 v[40:41], v0, off
+; CHECK-NEXT:s_endpgm
+  %result = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr 
@simple_whole_wave_func, i32 %input)
+  store i32 %result, ptr addrspace(1) %output
+  ret void
+}
+
+define amdgpu_gfx_whole_wave i32 @simple_whole_wave_func(i1 %active, i32 %x) {
+  %result = add i32 %x, 42
+  ret i32 %result
+}
+
+define amdgpu_gfx_whole_wave i32 @another_whole_wave_func(i1 %active, i32 %a, 
i32 %b) {
+  %sum = add i32 %a, %b
+  %result = mul i32 %sum, 2
+  ret i32 %result
+}
+
+define amdgpu_cs void @inline_multiple_wwf(i32 %x, i32 %y, ptr addrspace(1) 
%out1, ptr addrspace(1) %out2) {
+; CHECK-LABEL: inline_multiple_wwf:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_mov_b32 s1, simple_whole_wave_func@abs32@hi
+; CHECK-NEXT:s_mov_b32 s0, simple_whole_wave_func@abs32@lo
+; CHECK-NEXT:s_mov_b32 s32, 0
+; CHECK-NEXT:v_dual_mov_b32 v41, v5 :: v_dual_mov_b32 v44, v0
+; CHECK-NEXT:v_dual_mov_b32 v40, v4 :: v_dual_mov_b32 v43, v3
+; CHECK-NEXT:v_dual_mov_b32 v42, v2 :: v_dual_mov_b32 v45, v1
+; CHECK-NEXT:s_swappc_b64 s[30:31], s[0:1]
+; CHECK-NEXT:s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT:v_dual_mov_b32 v46, v0 :: v_dual_mov_b32 v1, v45
+; CHECK-NEXT:v_mov_b32_e32 v0, v44
+; CHECK-NEXT:s_mov_b32 s1, another_whole_wave_func@abs32@hi
+; CHECK-NEXT:s_mov_b32 s0, another_whole_wave_func@abs32@lo
+; CHECK-NEXT:s_wait_alu 0xfffe
+; CHECK-NEXT:s_swappc_b64 s[30:31], s[0:1]
+; CHECK-NEXT:global_store_b32 v[42:43], v46, off
+; CHECK-NEXT:global_store_b32 v[40:41], v0, off
+; CHECK-NEXT:s_endpgm
+  %result1 = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr 
@simple_whole_wave_func, i32 %x)
+  %result2 = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr 
@another_whole_wave_func, i32 %x, i32 %y)
+  store i32 %result1, ptr addrspace(1) %out1
+  store i32 %result2, ptr addrspace(1) %out2
+  ret void
+}
+

cmc-rep wrote:

Could we also have a test that tests one WWF called by multiple kernels? I 
believe it should work, still having a test like that is helpful. 

https://github.com/llvm/llvm-project/pull/169476
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [compiler-rt] [llvm] release/21.x: [SPARC] Remove CCIfConsecutiveRegs for f128 returns (#170133) (PR #170580)

2025-12-08 Thread Brad Smith via llvm-branch-commits

brad0 wrote:

@efriedma-quic Ping.

https://github.com/llvm/llvm-project/pull/170580
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)

2025-12-08 Thread Jakub Kuderski via llvm-branch-commits

https://github.com/kuhar approved this pull request.


https://github.com/llvm/llvm-project/pull/170876
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize binary (PR #171149)

2025-12-08 Thread Peter Waller via llvm-branch-commits


@@ -0,0 +1,46 @@
+# This test checks that BOLT can generate BTI landing pads for targets of 
stubs inserted in LongJmp.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %s %cflags -Wl,-q -o %t -mbranch-protection=bti -Wl,-z,force-bti
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions \
+# RUN: --print-split --print-only foo --print-longjmp 2>&1 | FileCheck %s
+
+#CHECK: BOLT-INFO: Starting stub-insertion pass
+#CHECK: Binary Function "foo" after long-jmp
+
+#CHECK:  cmp x0, #0x0
+#CHECK-NEXT: Successors: .LStub0
+
+#CHECK:  adrpx16, .Ltmp0
+#CHECK-NEXT: add x16, x16, :lo12:.Ltmp0
+#CHECK-NEXT: br  x16 # UNKNOWN CONTROL FLOW
+
+#CHECK: ---   HOT-COLD SPLIT POINT   ---
+
+#CHECK:  bti c
+#CHECK-NEXT: mov x0, #0x2
+#CHECK-NEXT: ret

peterwaller-arm wrote:

Consistency: all of the above should begin `# CHECK`.

https://github.com/llvm/llvm-project/pull/171149
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize binary (PR #171149)

2025-12-08 Thread Peter Waller via llvm-branch-commits


@@ -0,0 +1,35 @@
+# This test checks the situation where LongJmp adds a stub targeting an 
ignored (skipped) function.
+# The problem is that by default BOLT cannot modify ignored functions, so it 
cannot add the needed BTI.
+
+# Current behaviour is to emit an error.
+
+# REQUIRES: system-linux, asserts
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN:   -mattr=+bti -aarch64-mark-bti-property %s -o %t.o
+# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q -Wl,-z,force-bti
+# RUN: not llvm-bolt %t.exe -o %t.bolt \
+# RUN:   --align-text=0x1000 --skip-funcs=far_away_func 2>&1 | FileCheck %s
+
+# CHECK: BOLT-ERROR: Cannot add BTI landing pad to ignored function 
far_away_func
+
+  .section .text
+  .global _start
+  .global far_away_func
+
+  .align 4
+  .global _start
+  .type _start, %function
+_start:
+bti c
+bl far_away_func
+ret
+
+  .global far_away_func

peterwaller-arm wrote:

Worth a short comment to state that it is far away once bolt has run, because 
it is skipped, and why? 

https://github.com/llvm/llvm-project/pull/171149
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)

2025-12-08 Thread Gang Chen via llvm-branch-commits


@@ -0,0 +1,262 @@
+//===-- AMDGPUMachineLevelInliner.cpp - AMDGPU Machine Level Inliner ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "AMDGPUMachineLevelInliner.h"
+#include "AMDGPU.h"
+#include "AMDGPUMachineModuleInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManagers.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-machine-level-inliner"
+
+namespace {
+class AMDGPUInliningPassManager : public FPPassManager {
+public:
+  static char ID;
+
+  explicit AMDGPUInliningPassManager() : FPPassManager(ID) {}
+
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool doFinalization(Module &M) override;
+
+  StringRef getPassName() const override {
+return "AMDGPU Inlining Pass Manager";
+  }
+};
+
+/// AMDGPUInliningAnchor - A machine function pass that serves as an anchor for
+/// setting up the AMDGPU inlining pass manager infrastructure. It makes sure
+/// the inliner is run via an AMDGPUInliningPassManager. It can be run well in
+/// advance of the inliner as long as there are only FunctionPasses in between.
+class AMDGPUInliningAnchor : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification
+
+  AMDGPUInliningAnchor() : MachineFunctionPass(ID) {}
+
+  // We don't really need to process any functions here.
+  bool runOnMachineFunction(MachineFunction &MF) override { return false; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  StringRef getPassName() const override;
+
+  /// Prepare the pass manager stack for the inliner. This will push an
+  /// `AMDGPUInliningPassManager` onto the stack.
+  void preparePassManager(PMStack &Stack) override;
+};
+
+} // end anonymous namespace.
+
+// Pass identification
+char AMDGPUMachineLevelInliner::ID = 0;
+char AMDGPUInliningPassManager::ID = 0;
+char AMDGPUInliningAnchor::ID = 0;
+
+char &llvm::AMDGPUMachineLevelInlinerID = AMDGPUMachineLevelInliner::ID;
+char &llvm::AMDGPUInliningAnchorID = AMDGPUInliningAnchor::ID;
+
+INITIALIZE_PASS_BEGIN(AMDGPUMachineLevelInliner, DEBUG_TYPE,
+  "AMDGPU Machine Level Inliner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUInliningAnchor)
+INITIALIZE_PASS_END(AMDGPUMachineLevelInliner, DEBUG_TYPE,
+"AMDGPU Machine Level Inliner", false, false)
+
+INITIALIZE_PASS_BEGIN(AMDGPUInliningAnchor, "amdgpu-inlining-anchor",
+  "AMDGPU Inlining Anchor", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_END(AMDGPUInliningAnchor, "amdgpu-inlining-anchor",
+"AMDGPU Inlining Anchor", false, true)
+
+AMDGPUMachineLevelInliner::AMDGPUMachineLevelInliner()
+: MachineFunctionPass(ID) {
+  initializeAMDGPUMachineLevelInlinerPass(*PassRegistry::getPassRegistry());
+}
+
+void AMDGPUMachineLevelInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired();
+  AU.addRequired();
+  AU.addPreserved();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool AMDGPUMachineLevelInliner::runOnMachineFunction(MachineFunction &MF) {
+  MachineModuleInfo &MMI = 
getAnalysis().getMMI();
+
+  Function &F = MF.getFunction();
+  if (shouldInlineCallsTo(F)) {

cmc-rep wrote:

Suggestion: perhaps we should say "mayInlineCallsTo(F)". I am thinking about 
more broader use of InlinePassManager?

https://github.com/llvm/llvm-project/pull/169476
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)

2025-12-08 Thread Gang Chen via llvm-branch-commits


@@ -0,0 +1,262 @@
+//===-- AMDGPUMachineLevelInliner.cpp - AMDGPU Machine Level Inliner ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "AMDGPUMachineLevelInliner.h"
+#include "AMDGPU.h"
+#include "AMDGPUMachineModuleInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManagers.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-machine-level-inliner"
+
+namespace {
+class AMDGPUInliningPassManager : public FPPassManager {
+public:
+  static char ID;
+
+  explicit AMDGPUInliningPassManager() : FPPassManager(ID) {}
+
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool doFinalization(Module &M) override;
+
+  StringRef getPassName() const override {
+return "AMDGPU Inlining Pass Manager";
+  }
+};
+
+/// AMDGPUInliningAnchor - A machine function pass that serves as an anchor for
+/// setting up the AMDGPU inlining pass manager infrastructure. It makes sure
+/// the inliner is run via an AMDGPUInliningPassManager. It can be run well in
+/// advance of the inliner as long as there are only FunctionPasses in between.
+class AMDGPUInliningAnchor : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification
+
+  AMDGPUInliningAnchor() : MachineFunctionPass(ID) {}
+
+  // We don't really need to process any functions here.
+  bool runOnMachineFunction(MachineFunction &MF) override { return false; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  StringRef getPassName() const override;
+
+  /// Prepare the pass manager stack for the inliner. This will push an
+  /// `AMDGPUInliningPassManager` onto the stack.
+  void preparePassManager(PMStack &Stack) override;
+};
+
+} // end anonymous namespace.
+
+// Pass identification
+char AMDGPUMachineLevelInliner::ID = 0;
+char AMDGPUInliningPassManager::ID = 0;
+char AMDGPUInliningAnchor::ID = 0;
+
+char &llvm::AMDGPUMachineLevelInlinerID = AMDGPUMachineLevelInliner::ID;
+char &llvm::AMDGPUInliningAnchorID = AMDGPUInliningAnchor::ID;
+
+INITIALIZE_PASS_BEGIN(AMDGPUMachineLevelInliner, DEBUG_TYPE,
+  "AMDGPU Machine Level Inliner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUInliningAnchor)
+INITIALIZE_PASS_END(AMDGPUMachineLevelInliner, DEBUG_TYPE,
+"AMDGPU Machine Level Inliner", false, false)
+
+INITIALIZE_PASS_BEGIN(AMDGPUInliningAnchor, "amdgpu-inlining-anchor",
+  "AMDGPU Inlining Anchor", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_END(AMDGPUInliningAnchor, "amdgpu-inlining-anchor",
+"AMDGPU Inlining Anchor", false, true)
+
+AMDGPUMachineLevelInliner::AMDGPUMachineLevelInliner()
+: MachineFunctionPass(ID) {
+  initializeAMDGPUMachineLevelInlinerPass(*PassRegistry::getPassRegistry());
+}
+
+void AMDGPUMachineLevelInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired();
+  AU.addRequired();
+  AU.addPreserved();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool AMDGPUMachineLevelInliner::runOnMachineFunction(MachineFunction &MF) {
+  MachineModuleInfo &MMI = 
getAnalysis().getMMI();
+
+  Function &F = MF.getFunction();
+  if (shouldInlineCallsTo(F)) {
+// Mark the function as machine-inlined in AMDGPUMachineModuleInfo. This
+// tells the inlining pass manager to stop processing it.
+auto &AMMMI = MMI.getObjFileInfo();
+AMMMI.addMachineInlinedFunction(F);

cmc-rep wrote:

Maybe the name could be "addMachineIiningCandidate(F)"? Again I am thinking 
about potentially more broader use of the InlinePassManager.

https://github.com/llvm/llvm-project/pull/169476
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)

2025-12-08 Thread Gang Chen via llvm-branch-commits

https://github.com/cmc-rep edited 
https://github.com/llvm/llvm-project/pull/169476
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)

2025-12-08 Thread Gang Chen via llvm-branch-commits

https://github.com/cmc-rep edited 
https://github.com/llvm/llvm-project/pull/169476
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)

2025-12-08 Thread Alexis Engelke via llvm-branch-commits




aengelke wrote:

We don't provide any API/ABI stability across LLVM versions, so APIVersion 
seems rather useless in practice anyway. If there's an APIVersion mismatch, the 
plugin is built against an incompatible LLVM version anyway...

Fixing this properly would require to stop returning the info struct by value, 
which would require changes for all users, which I wanted to avoid.

https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)

2025-12-08 Thread Alexis Engelke via llvm-branch-commits


@@ -1017,16 +1018,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
 }
 #endif
   }
-  // Attempt to load pass plugins and register their callbacks with PB.
-  for (auto &PluginFN : CodeGenOpts.PassPlugins) {
-auto PassPlugin = PassPlugin::Load(PluginFN);
-if (PassPlugin) {
-  PassPlugin->registerPassBuilderCallbacks(PB);
-} else {
-  Diags.Report(diag::err_fe_unable_to_load_plugin)
-  << PluginFN << toString(PassPlugin.takeError());
-}
-  }
+  // Register plugin callbacks with PB.
+  for (auto &Plugin : Plugins)
+Plugin.registerPassBuilderCallbacks(PB);

aengelke wrote:

*Need* no, they continue to work just fine.

- LTO: yes, might be good to have at some point in the future.
- Flang: likewise.
- clang-linker-wrapper: if there's interest, this can be added. While it 
currently seems to load plugins, it doesn't seem to call any methods on them.
- MLIR ModuleToObject::translateToISA also only seems to be used for GPUs and 
MLIR doesn't do anything related to plugins right now.

opt doesn't call and lld doesn't directly call back-ends.

https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)

2025-12-08 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/170876

>From 52dbe766a39af89c0cc198730b3933e679733bed Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Fri, 5 Dec 2025 16:08:28 +0300
Subject: [PATCH] [ADT] Make use of subsetOf and anyCommon methods of BitVector
 (NFC)

Replace the code along these lines

BitVector Tmp = LHS;
Tmp &= RHS;
return Tmp.any();

and

BitVector Tmp = LHS;
Tmp.reset(RHS);
return Tmp.none();

with `LHS.anyCommon(RHS)` and `LHS.subsetOf(RHS)`, correspondingly, which
do not require creating temporary BitVector and can return early.
---
 bolt/include/bolt/Passes/LivenessAnalysis.h   |  5 ++---
 bolt/include/bolt/Passes/ReachingDefOrUse.h   |  3 +--
 bolt/lib/Passes/RegReAssign.cpp   |  8 ++--
 bolt/lib/Passes/ShrinkWrapping.cpp| 10 --
 bolt/lib/Passes/StackAvailableExpressions.cpp |  3 +--
 bolt/lib/Passes/TailDuplication.cpp   |  8 
 llvm/lib/CodeGen/RDFRegisters.cpp |  6 ++
 llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp |  6 ++
 8 files changed, 18 insertions(+), 31 deletions(-)

diff --git a/bolt/include/bolt/Passes/LivenessAnalysis.h 
b/bolt/include/bolt/Passes/LivenessAnalysis.h
index 1df1113318d0b..f4faa1dc34ecd 100644
--- a/bolt/include/bolt/Passes/LivenessAnalysis.h
+++ b/bolt/include/bolt/Passes/LivenessAnalysis.h
@@ -37,10 +37,9 @@ class LivenessAnalysis : public 
DataflowAnalysisgetStateAt(PP));
+const BitVector &BV = *this->getStateAt(PP);
 const BitVector &RegAliases = BC.MIB->getAliases(Reg);
-BV &= RegAliases;
-return BV.any();
+return BV.anyCommon(RegAliases);
   }
 
   void run() { Parent::run(); }
diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h 
b/bolt/include/bolt/Passes/ReachingDefOrUse.h
index 585d673e3b84e..41a6091aad4cb 100644
--- a/bolt/include/bolt/Passes/ReachingDefOrUse.h
+++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h
@@ -133,8 +133,7 @@ class ReachingDefOrUse
   RA.getInstClobberList(Point, Regs);
 else
   RA.getInstUsedRegsList(Point, Regs, false);
-Regs &= this->BC.MIB->getAliases(*TrackingReg);
-if (Regs.any())
+if (Regs.anyCommon(this->BC.MIB->getAliases(*TrackingReg)))
   Next.set(this->ExprToIdx[&Point]);
   }
 }
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 0859cd244ce40..54eff51bfee68 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -316,18 +316,14 @@ void 
RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
   break;
 }
 
-BitVector AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ClassicReg))) {
   LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
 << " with " << BC.MRI->getName(ExtReg)
 << " because classic reg is alive\n");
   --End;
   continue;
 }
-AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ExtReg))) {
   LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
 << " with " << BC.MRI->getName(ExtReg)
 << " because extended reg is alive\n");
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp 
b/bolt/lib/Passes/ShrinkWrapping.cpp
index fe342ccd38a67..b882e2512866d 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -1100,9 +1100,8 @@ SmallVector 
ShrinkWrapping::fixPopsPlacements(
 bool Found = false;
 if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
 SaveOffset) {
-  BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
-  BV &= UsesByReg[CSR];
-  if (!BV.any()) {
+  const BitVector &BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
+  if (!BV.anyCommon(UsesByReg[CSR])) {
 Found = true;
 PP = BB;
 continue;
@@ -1110,9 +1109,8 @@ SmallVector 
ShrinkWrapping::fixPopsPlacements(
 }
 for (MCInst &Inst : llvm::reverse(*BB)) {
   if (SPT.getStateBefore(Inst)->first == SaveOffset) {
-BitVector BV = *RI.getStateAt(Inst);
-BV &= UsesByReg[CSR];
-if (!BV.any()) {
+const BitVector &BV = *RI.getStateAt(Inst);
+if (!BV.anyCommon(UsesByReg[CSR])) {
   Found = true;
   PP = &Inst;
   break;
diff --git a/bolt/lib/Passes/StackAvailableExpressions.cpp 
b/bolt/lib/Passes/StackAvailableExpressions.cpp
index a0d361f273de2..c685cc19badc3 100644
--- a/bolt/lib/Passes/StackAvailableExpressions.cpp
+++ b/bolt/lib/Passes/StackAvailableExpressions.cpp
@@ -103,8 +103,7 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst 
*X

[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)

2025-12-08 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/170876

>From 52dbe766a39af89c0cc198730b3933e679733bed Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Fri, 5 Dec 2025 16:08:28 +0300
Subject: [PATCH] [ADT] Make use of subsetOf and anyCommon methods of BitVector
 (NFC)

Replace the code along these lines

BitVector Tmp = LHS;
Tmp &= RHS;
return Tmp.any();

and

BitVector Tmp = LHS;
Tmp.reset(RHS);
return Tmp.none();

with `LHS.anyCommon(RHS)` and `LHS.subsetOf(RHS)`, correspondingly, which
do not require creating temporary BitVector and can return early.
---
 bolt/include/bolt/Passes/LivenessAnalysis.h   |  5 ++---
 bolt/include/bolt/Passes/ReachingDefOrUse.h   |  3 +--
 bolt/lib/Passes/RegReAssign.cpp   |  8 ++--
 bolt/lib/Passes/ShrinkWrapping.cpp| 10 --
 bolt/lib/Passes/StackAvailableExpressions.cpp |  3 +--
 bolt/lib/Passes/TailDuplication.cpp   |  8 
 llvm/lib/CodeGen/RDFRegisters.cpp |  6 ++
 llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp |  6 ++
 8 files changed, 18 insertions(+), 31 deletions(-)

diff --git a/bolt/include/bolt/Passes/LivenessAnalysis.h 
b/bolt/include/bolt/Passes/LivenessAnalysis.h
index 1df1113318d0b..f4faa1dc34ecd 100644
--- a/bolt/include/bolt/Passes/LivenessAnalysis.h
+++ b/bolt/include/bolt/Passes/LivenessAnalysis.h
@@ -37,10 +37,9 @@ class LivenessAnalysis : public 
DataflowAnalysisgetStateAt(PP));
+const BitVector &BV = *this->getStateAt(PP);
 const BitVector &RegAliases = BC.MIB->getAliases(Reg);
-BV &= RegAliases;
-return BV.any();
+return BV.anyCommon(RegAliases);
   }
 
   void run() { Parent::run(); }
diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h 
b/bolt/include/bolt/Passes/ReachingDefOrUse.h
index 585d673e3b84e..41a6091aad4cb 100644
--- a/bolt/include/bolt/Passes/ReachingDefOrUse.h
+++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h
@@ -133,8 +133,7 @@ class ReachingDefOrUse
   RA.getInstClobberList(Point, Regs);
 else
   RA.getInstUsedRegsList(Point, Regs, false);
-Regs &= this->BC.MIB->getAliases(*TrackingReg);
-if (Regs.any())
+if (Regs.anyCommon(this->BC.MIB->getAliases(*TrackingReg)))
   Next.set(this->ExprToIdx[&Point]);
   }
 }
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 0859cd244ce40..54eff51bfee68 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -316,18 +316,14 @@ void 
RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
   break;
 }
 
-BitVector AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ClassicReg))) {
   LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
 << " with " << BC.MRI->getName(ExtReg)
 << " because classic reg is alive\n");
   --End;
   continue;
 }
-AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ExtReg))) {
   LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
 << " with " << BC.MRI->getName(ExtReg)
 << " because extended reg is alive\n");
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp 
b/bolt/lib/Passes/ShrinkWrapping.cpp
index fe342ccd38a67..b882e2512866d 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -1100,9 +1100,8 @@ SmallVector 
ShrinkWrapping::fixPopsPlacements(
 bool Found = false;
 if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
 SaveOffset) {
-  BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
-  BV &= UsesByReg[CSR];
-  if (!BV.any()) {
+  const BitVector &BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
+  if (!BV.anyCommon(UsesByReg[CSR])) {
 Found = true;
 PP = BB;
 continue;
@@ -1110,9 +1109,8 @@ SmallVector 
ShrinkWrapping::fixPopsPlacements(
 }
 for (MCInst &Inst : llvm::reverse(*BB)) {
   if (SPT.getStateBefore(Inst)->first == SaveOffset) {
-BitVector BV = *RI.getStateAt(Inst);
-BV &= UsesByReg[CSR];
-if (!BV.any()) {
+const BitVector &BV = *RI.getStateAt(Inst);
+if (!BV.anyCommon(UsesByReg[CSR])) {
   Found = true;
   PP = &Inst;
   break;
diff --git a/bolt/lib/Passes/StackAvailableExpressions.cpp 
b/bolt/lib/Passes/StackAvailableExpressions.cpp
index a0d361f273de2..c685cc19badc3 100644
--- a/bolt/lib/Passes/StackAvailableExpressions.cpp
+++ b/bolt/lib/Passes/StackAvailableExpressions.cpp
@@ -103,8 +103,7 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst 
*X

[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)

2025-12-08 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/170876

>From 24957f7595d80072554580edf18802aadc30c303 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Fri, 5 Dec 2025 16:08:28 +0300
Subject: [PATCH] [ADT] Make use of subsetOf and anyCommon methods of BitVector
 (NFC)

Replace the code along these lines

BitVector Tmp = LHS;
Tmp &= RHS;
return Tmp.any();

and

BitVector Tmp = LHS;
Tmp.reset(RHS);
return Tmp.none();

with `LHS.anyCommon(RHS)` and `LHS.subsetOf(RHS)`, correspondingly, which
do not require creating temporary BitVector and can return early.
---
 bolt/include/bolt/Passes/LivenessAnalysis.h   |  5 ++---
 bolt/include/bolt/Passes/ReachingDefOrUse.h   |  3 +--
 bolt/lib/Passes/RegReAssign.cpp   |  8 ++--
 bolt/lib/Passes/ShrinkWrapping.cpp| 10 --
 bolt/lib/Passes/StackAvailableExpressions.cpp |  3 +--
 bolt/lib/Passes/TailDuplication.cpp   |  8 
 llvm/lib/CodeGen/RDFRegisters.cpp |  6 ++
 llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp |  6 ++
 8 files changed, 18 insertions(+), 31 deletions(-)

diff --git a/bolt/include/bolt/Passes/LivenessAnalysis.h 
b/bolt/include/bolt/Passes/LivenessAnalysis.h
index 1df1113318d0b..f4faa1dc34ecd 100644
--- a/bolt/include/bolt/Passes/LivenessAnalysis.h
+++ b/bolt/include/bolt/Passes/LivenessAnalysis.h
@@ -37,10 +37,9 @@ class LivenessAnalysis : public 
DataflowAnalysisgetStateAt(PP));
+const BitVector &BV = *this->getStateAt(PP);
 const BitVector &RegAliases = BC.MIB->getAliases(Reg);
-BV &= RegAliases;
-return BV.any();
+return BV.anyCommon(RegAliases);
   }
 
   void run() { Parent::run(); }
diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h 
b/bolt/include/bolt/Passes/ReachingDefOrUse.h
index 585d673e3b84e..41a6091aad4cb 100644
--- a/bolt/include/bolt/Passes/ReachingDefOrUse.h
+++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h
@@ -133,8 +133,7 @@ class ReachingDefOrUse
   RA.getInstClobberList(Point, Regs);
 else
   RA.getInstUsedRegsList(Point, Regs, false);
-Regs &= this->BC.MIB->getAliases(*TrackingReg);
-if (Regs.any())
+if (Regs.anyCommon(this->BC.MIB->getAliases(*TrackingReg)))
   Next.set(this->ExprToIdx[&Point]);
   }
 }
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 0859cd244ce40..54eff51bfee68 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -316,18 +316,14 @@ void 
RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
   break;
 }
 
-BitVector AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ClassicReg))) {
   LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
 << " with " << BC.MRI->getName(ExtReg)
 << " because classic reg is alive\n");
   --End;
   continue;
 }
-AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ExtReg))) {
   LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
 << " with " << BC.MRI->getName(ExtReg)
 << " because extended reg is alive\n");
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp 
b/bolt/lib/Passes/ShrinkWrapping.cpp
index fe342ccd38a67..b882e2512866d 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -1100,9 +1100,8 @@ SmallVector 
ShrinkWrapping::fixPopsPlacements(
 bool Found = false;
 if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
 SaveOffset) {
-  BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
-  BV &= UsesByReg[CSR];
-  if (!BV.any()) {
+  const BitVector &BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
+  if (!BV.anyCommon(UsesByReg[CSR])) {
 Found = true;
 PP = BB;
 continue;
@@ -1110,9 +1109,8 @@ SmallVector 
ShrinkWrapping::fixPopsPlacements(
 }
 for (MCInst &Inst : llvm::reverse(*BB)) {
   if (SPT.getStateBefore(Inst)->first == SaveOffset) {
-BitVector BV = *RI.getStateAt(Inst);
-BV &= UsesByReg[CSR];
-if (!BV.any()) {
+const BitVector &BV = *RI.getStateAt(Inst);
+if (!BV.anyCommon(UsesByReg[CSR])) {
   Found = true;
   PP = &Inst;
   break;
diff --git a/bolt/lib/Passes/StackAvailableExpressions.cpp 
b/bolt/lib/Passes/StackAvailableExpressions.cpp
index a0d361f273de2..c685cc19badc3 100644
--- a/bolt/lib/Passes/StackAvailableExpressions.cpp
+++ b/bolt/lib/Passes/StackAvailableExpressions.cpp
@@ -103,8 +103,7 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst 
*X

[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)

2025-12-08 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/170876

>From 24957f7595d80072554580edf18802aadc30c303 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Fri, 5 Dec 2025 16:08:28 +0300
Subject: [PATCH] [ADT] Make use of subsetOf and anyCommon methods of BitVector
 (NFC)

Replace the code along these lines

BitVector Tmp = LHS;
Tmp &= RHS;
return Tmp.any();

and

BitVector Tmp = LHS;
Tmp.reset(RHS);
return Tmp.none();

with `LHS.anyCommon(RHS)` and `LHS.subsetOf(RHS)`, correspondingly, which
do not require creating temporary BitVector and can return early.
---
 bolt/include/bolt/Passes/LivenessAnalysis.h   |  5 ++---
 bolt/include/bolt/Passes/ReachingDefOrUse.h   |  3 +--
 bolt/lib/Passes/RegReAssign.cpp   |  8 ++--
 bolt/lib/Passes/ShrinkWrapping.cpp| 10 --
 bolt/lib/Passes/StackAvailableExpressions.cpp |  3 +--
 bolt/lib/Passes/TailDuplication.cpp   |  8 
 llvm/lib/CodeGen/RDFRegisters.cpp |  6 ++
 llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp |  6 ++
 8 files changed, 18 insertions(+), 31 deletions(-)

diff --git a/bolt/include/bolt/Passes/LivenessAnalysis.h 
b/bolt/include/bolt/Passes/LivenessAnalysis.h
index 1df1113318d0b..f4faa1dc34ecd 100644
--- a/bolt/include/bolt/Passes/LivenessAnalysis.h
+++ b/bolt/include/bolt/Passes/LivenessAnalysis.h
@@ -37,10 +37,9 @@ class LivenessAnalysis : public 
DataflowAnalysisgetStateAt(PP));
+const BitVector &BV = *this->getStateAt(PP);
 const BitVector &RegAliases = BC.MIB->getAliases(Reg);
-BV &= RegAliases;
-return BV.any();
+return BV.anyCommon(RegAliases);
   }
 
   void run() { Parent::run(); }
diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h 
b/bolt/include/bolt/Passes/ReachingDefOrUse.h
index 585d673e3b84e..41a6091aad4cb 100644
--- a/bolt/include/bolt/Passes/ReachingDefOrUse.h
+++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h
@@ -133,8 +133,7 @@ class ReachingDefOrUse
   RA.getInstClobberList(Point, Regs);
 else
   RA.getInstUsedRegsList(Point, Regs, false);
-Regs &= this->BC.MIB->getAliases(*TrackingReg);
-if (Regs.any())
+if (Regs.anyCommon(this->BC.MIB->getAliases(*TrackingReg)))
   Next.set(this->ExprToIdx[&Point]);
   }
 }
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 0859cd244ce40..54eff51bfee68 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -316,18 +316,14 @@ void 
RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
   break;
 }
 
-BitVector AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ClassicReg))) {
   LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
 << " with " << BC.MRI->getName(ExtReg)
 << " because classic reg is alive\n");
   --End;
   continue;
 }
-AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ExtReg))) {
   LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
 << " with " << BC.MRI->getName(ExtReg)
 << " because extended reg is alive\n");
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp 
b/bolt/lib/Passes/ShrinkWrapping.cpp
index fe342ccd38a67..b882e2512866d 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -1100,9 +1100,8 @@ SmallVector 
ShrinkWrapping::fixPopsPlacements(
 bool Found = false;
 if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
 SaveOffset) {
-  BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
-  BV &= UsesByReg[CSR];
-  if (!BV.any()) {
+  const BitVector &BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
+  if (!BV.anyCommon(UsesByReg[CSR])) {
 Found = true;
 PP = BB;
 continue;
@@ -1110,9 +1109,8 @@ SmallVector 
ShrinkWrapping::fixPopsPlacements(
 }
 for (MCInst &Inst : llvm::reverse(*BB)) {
   if (SPT.getStateBefore(Inst)->first == SaveOffset) {
-BitVector BV = *RI.getStateAt(Inst);
-BV &= UsesByReg[CSR];
-if (!BV.any()) {
+const BitVector &BV = *RI.getStateAt(Inst);
+if (!BV.anyCommon(UsesByReg[CSR])) {
   Found = true;
   PP = &Inst;
   break;
diff --git a/bolt/lib/Passes/StackAvailableExpressions.cpp 
b/bolt/lib/Passes/StackAvailableExpressions.cpp
index a0d361f273de2..c685cc19badc3 100644
--- a/bolt/lib/Passes/StackAvailableExpressions.cpp
+++ b/bolt/lib/Passes/StackAvailableExpressions.cpp
@@ -103,8 +103,7 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst 
*X

[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)

2025-12-08 Thread Stefan Gränitz via llvm-branch-commits

https://github.com/weliveindetail edited 
https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)

2025-12-08 Thread Stefan Gränitz via llvm-branch-commits


@@ -1017,16 +1018,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
 }
 #endif
   }
-  // Attempt to load pass plugins and register their callbacks with PB.
-  for (auto &PluginFN : CodeGenOpts.PassPlugins) {
-auto PassPlugin = PassPlugin::Load(PluginFN);
-if (PassPlugin) {
-  PassPlugin->registerPassBuilderCallbacks(PB);
-} else {
-  Diags.Report(diag::err_fe_unable_to_load_plugin)
-  << PluginFN << toString(PassPlugin.takeError());
-}
-  }
+  // Register plugin callbacks with PB.
+  for (auto &Plugin : Plugins)
+Plugin.registerPassBuilderCallbacks(PB);

weliveindetail wrote:

What about the other places in-tree that load plugins: Flang, 
clang-linker-wrapper, libLTO, opt and lld? Do they need adjustment?

https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)

2025-12-08 Thread Stefan Gränitz via llvm-branch-commits


@@ -49,7 +53,14 @@ struct PassPluginLibraryInfo {
 
   /// The callback for registering plugin passes with a \c PassBuilder
   /// instance
-  void (*RegisterPassBuilderCallbacks)(PassBuilder &);
+  void (*RegisterPassBuilderCallbacks)(PassBuilder &) = nullptr;
+
+  /// Callback called before running the back-end passes on the module. The
+  /// callback can generate code itself by writing the expected output to OS 
and
+  /// returning true to prevent the default pipeline and further plugin
+  /// callbacks from running.
+  bool (*PreCodeGenCallback)(Module &, TargetMachine &, CodeGenFileType,

weliveindetail wrote:

I like the idea to have pre/post opt/codegen callbacks. If we ever get the new 
pass manager in the Codegen pipeline, that would help plugins to handle the 
pipeline change. Could we generalize this into something like 
`registerPipelinePhaseCallbacks()`?

https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)

2025-12-08 Thread Stefan Gränitz via llvm-branch-commits

https://github.com/weliveindetail commented:

Please note that this is the first-ever change in the pass-plugin API. This 
isn't a breaking change, but we might need them if we need to fix things down 
the line. This change will also affect lld as well as out-of-tree frontends 
like Rust and Swift.

https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)

2025-12-08 Thread Stefan Gränitz via llvm-branch-commits




weliveindetail wrote:

Should we handle "version 1" plugins explicitly here? The 
`PassPluginLibraryInfo` structs that plugins return today, have one member 
less. Copying them into the "version 2" `Info` stack slot might pull in 
uninitialzed memory for the newly introduced function pointer.

https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)

2025-12-08 Thread Alexis Engelke via llvm-branch-commits


@@ -49,7 +53,14 @@ struct PassPluginLibraryInfo {
 
   /// The callback for registering plugin passes with a \c PassBuilder
   /// instance
-  void (*RegisterPassBuilderCallbacks)(PassBuilder &);
+  void (*RegisterPassBuilderCallbacks)(PassBuilder &) = nullptr;
+
+  /// Callback called before running the back-end passes on the module. The
+  /// callback can generate code itself by writing the expected output to OS 
and
+  /// returning true to prevent the default pipeline and further plugin
+  /// callbacks from running.
+  bool (*PreCodeGenCallback)(Module &, TargetMachine &, CodeGenFileType,

aengelke wrote:

I would consider this to be out-of-scope for now. Back-end pipelines are, 
currently, target-specific and any support for plugins modifying these 
pipelines is likely to need a lot of further discussion (esp. as many back-end 
passes not really self-contained). There was some discussion on 
CodeGenPassBuilder to add callbacks for targets earlier this year, but it seems 
work on that has stalled. Additionally, the proposed plugin API already permits 
building and running a custom pass manager, so right now there would be no 
benefit of speculatively building a more generic API.

If there's a desire to add external hooks for CodeGenPassBuilder, I think this 
should be a separate function similar to the RegisterPassBuilderCallbacks that 
exists so far.

https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [llvm-rc] Don't interpret integer literals as octal numbers in rc.exe mode (#166915) (PR #167174)

2025-12-08 Thread via llvm-branch-commits

dyung wrote:

> Are we merging this in 21.x or should we close?

Sorry for the late reply, but it was decided we would not take this change in 
the 21.x release branch.

https://github.com/llvm/llvm-project/pull/167174
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang-format] Don't swap `(const override)` with QAS_Right (#167191) (PR #170966)

2025-12-08 Thread via llvm-branch-commits

dyung wrote:

Any idea how long this bug has existed?

https://github.com/llvm/llvm-project/pull/170966
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 75437ec - Revert "[AMDGPU] Enable i8 GEP promotion for vector allocas (#166132)"

2025-12-08 Thread via llvm-branch-commits

Author: Jan Patrick Lehr
Date: 2025-12-08T08:55:16+01:00
New Revision: 75437ec7cf23b705b336d6432d77159f450e62cf

URL: 
https://github.com/llvm/llvm-project/commit/75437ec7cf23b705b336d6432d77159f450e62cf
DIFF: 
https://github.com/llvm/llvm-project/commit/75437ec7cf23b705b336d6432d77159f450e62cf.diff

LOG: Revert "[AMDGPU] Enable i8 GEP promotion for vector allocas (#166132)"

This reverts commit 6ec8c4351cfc1d0627d1633b02ea787bd29c77d8.

Added: 


Modified: 
llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll

Removed: 




diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index bab76e87af40c..b79689c39ef84 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -457,25 +457,10 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, 
AllocaInst *Alloca,
   const auto &VarOffset = VarOffsets.front();
   APInt OffsetQuot;
   APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
-  Value *Offset = VarOffset.first;
-  if (Rem != 0) {
-unsigned ElemSizeShift = Log2_64(VecElemSize);
-SimplifyQuery SQ(DL);
-SQ.CxtI = GEP;
-KnownBits KB = computeKnownBits(VarOffset.first, SQ);
-// Bail out if the index may point into the middle of an element.
-if (KB.countMinTrailingZeros() < ElemSizeShift)
-  return nullptr;
-
-Value *Scaled = Builder.CreateLShr(VarOffset.first, ElemSizeShift);
-if (Instruction *NewInst = dyn_cast(Scaled))
-  NewInsts.push_back(NewInst);
-
-Offset = Scaled;
-OffsetQuot = APInt(BW, 1);
-Rem = 0;
-  }
+  if (Rem != 0 || OffsetQuot.isZero())
+return nullptr;
 
+  Value *Offset = VarOffset.first;
   if (!isa(Offset->getType()))
 return nullptr;
 

diff  --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll 
b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
index bcc61062640d2..76e1868b3c4b9 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
@@ -250,150 +250,6 @@ bb2:
   store i32 0, ptr addrspace(5) %extractelement
   ret void
 }
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_0_or_4(ptr %buffer, 
float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_0_or_4(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT:[[ALLOCA:%.*]] = freeze <3 x float> poison
-; CHECK-NEXT:[[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT:[[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 0, i32 4
-; CHECK-NEXT:[[TMP1:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT:[[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float 
[[DATA]], i32 [[TMP1]]
-; CHECK-NEXT:store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
-; CHECK-NEXT:ret void
-;
-  %alloca = alloca <3 x float>, align 16, addrspace(5)
-  %vec = load <3 x float>, ptr %buffer
-  store <3 x float> %vec, ptr addrspace(5) %alloca
-  %index = select i1 %idx_sel, i32 0, i32 4
-  %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
-  store float %data, ptr addrspace(5) %elt, align 4
-  %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
-  store <3 x float> %updated, ptr %buffer, align 16
-  ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_8(ptr %buffer, 
float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_8(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT:[[ALLOCA:%.*]] = freeze <3 x float> poison
-; CHECK-NEXT:[[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT:[[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 8
-; CHECK-NEXT:[[TMP1:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT:[[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float 
[[DATA]], i32 [[TMP1]]
-; CHECK-NEXT:store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
-; CHECK-NEXT:ret void
-;
-  %alloca = alloca <3 x float>, align 16, addrspace(5)
-  %vec = load <3 x float>, ptr %buffer
-  store <3 x float> %vec, ptr addrspace(5) %alloca
-  %index = select i1 %idx_sel, i32 4, i32 8
-  %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
-  store float %data, ptr addrspace(5) %elt, align 4
-  %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
-  store <3 x float> %updated, ptr %buffer, align 16
-  ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_8(ptr 
%buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void 
@scalar_alloca_nested_vector_gep_i8_4_or_8(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT:[[ALLOCA:%.*]] = freeze <8 x float> poison

[llvm-branch-commits] [llvm] release/21.x: [WebAssembly] Remove FAKE_USEs before ExplicitLocals (#160768) (PR #171184)

2025-12-08 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/171184

Backport e5b2a06546eb20662156b8a59b77aca086301486

Requested by: @dschuff

>From 3c97f983e2a9b3fe1c8c9472bc40e05301f30179 Mon Sep 17 00:00:00 2001
From: Heejin Ahn 
Date: Thu, 25 Sep 2025 14:49:25 -0700
Subject: [PATCH] [WebAssembly] Remove FAKE_USEs before ExplicitLocals
 (#160768)

`FAKE_USE`s are essentially no-ops, so they have to be removed before
running ExplicitLocals so that `drop`s will be correctly inserted to
drop those values used by the `FAKE_USE`s.

---

This is reapplication of #160228, which broke Wasm waterfall. This PR
additionally prevents `FAKE_USE`s uses from being stackified.

Previously, a 'def' whose first use was a `FAKE_USE` was able to be
stackified as `TEE`:
- Before
```
Reg = INST ...// Def
FAKE_USE ..., Reg, ...// Insert
INST ..., Reg, ...
INST ..., Reg, ...
```

- After RegStackify
```
DefReg = INST ...// Def
TeeReg, Reg = TEE ... DefReg
FAKE_USE ..., TeeReg, ...// Insert
INST ..., Reg, ...
INST ..., Reg, ...
```
And this assumes `DefReg` and `TeeReg` are stackified.

But this PR removes `FAKE_USE`s in the beginning of ExplicitLocals. And
later in ExplicitLocals we have a routine to unstackify registers that
have no uses left:

https://github.com/llvm/llvm-project/blob/7b28fcd2b182ba2c9d2d71c386be92fc0ee3cc9d/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp#L257-L269
(This was added in #149626. Then it didn't seem it would trigger the
same assertions for `TEE`s because it was fixing the bug where a
terminator was removed in CFGSort (#149097).
Details here:
https://github.com/llvm/llvm-project/pull/149432#issuecomment-3091444141)

- After `FAKE_USE` removal and unstackification
```
DefReg = INST ...
TeeReg, Reg = TEE ... DefReg
INST ..., Reg, ...
INST ..., Reg, ...
```
And now `TeeReg` is unstackified. This triggered the assertion here,
that `TeeReg` should be stackified:

https://github.com/llvm/llvm-project/blob/7b28fcd2b182ba2c9d2d71c386be92fc0ee3cc9d/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp#L316

This prevents `FAKE_USE`s' uses from being stackified altogether,
including `TEE` transformation. Even when it is not a `TEE`
transformation and just a single use stackification, it does not trigger
the assertion but there's no point stackifying it given that it will be
deleted.

---

Fixes https://github.com/emscripten-core/emscripten/issues/25301.

(cherry picked from commit e5b2a06546eb20662156b8a59b77aca086301486)
---
 .../WebAssembly/WebAssemblyExplicitLocals.cpp | 14 +++
 .../WebAssembly/WebAssemblyRegStackify.cpp|  4 +++
 llvm/test/CodeGen/WebAssembly/fake-use.ll | 25 +++
 3 files changed, 43 insertions(+)
 create mode 100644 llvm/test/CodeGen/WebAssembly/fake-use.ll

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index e6486e247209b..5c3127e2d3dc6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -216,6 +216,18 @@ static MachineInstr *findStartOfTree(MachineOperand &MO,
   return Def;
 }
 
+// FAKE_USEs are no-ops, so remove them here so that the values used by them
+// will be correctly dropped later.
+static void removeFakeUses(MachineFunction &MF) {
+  SmallVector ToDelete;
+  for (auto &MBB : MF)
+for (auto &MI : MBB)
+  if (MI.isFakeUse())
+ToDelete.push_back(&MI);
+  for (auto *MI : ToDelete)
+MI->eraseFromParent();
+}
+
 bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "** Make Locals Explicit **\n"
"** Function: "
@@ -226,6 +238,8 @@ bool 
WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
   WebAssemblyFunctionInfo &MFI = *MF.getInfo();
   const auto *TII = MF.getSubtarget().getInstrInfo();
 
+  removeFakeUses(MF);
+
   // Map non-stackified virtual registers to their local ids.
   DenseMap Reg2Local;
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index bc91c6424b63e..fd13ef9a1921d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -866,6 +866,10 @@ bool 
WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
   if (Insert->isDebugValue())
 continue;
 
+  // Ignore FAKE_USEs, which are no-ops and will be deleted later.
+  if (Insert->isFakeUse())
+continue;
+
   // Iterate through the inputs in reverse order, since we'll be pulling
   // operands off the stack in LIFO order.
   CommutingState Commuting;
diff --git a/llvm/test/CodeGen/WebAssembly/fake-use.ll 
b/llvm/test/CodeGen/WebAssembly/fake-use.ll
new file mode 100644
index 0..a18ce33566df0
--- /dev/null
+++

[llvm-branch-commits] [llvm] release/21.x: [WebAssembly] Remove FAKE_USEs before ExplicitLocals (#160768) (PR #171184)

2025-12-08 Thread via llvm-branch-commits

llvmbot wrote:

@dschuff What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/171184
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][ELF][AArch64] Replace memtag hack with less-confusing code (PR #171182)

2025-12-08 Thread Jessica Clarke via llvm-branch-commits


@@ -741,7 +741,7 @@ static void addRelativeReloc(Ctx &ctx, InputSectionBase 
&isec,
   // 
https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative
   if (sym.isTagged() && !isAArch64Auth &&
   (addend < 0 || static_cast(addend) >= sym.getSize()))
-isec.addReloc({expr, type, offsetInSec, addend, &sym});
+isec.addReloc({R_ADDEND_NEG, type, offsetInSec, addend, &sym});

jrtc27 wrote:

But I'm deferring that until 
https://github.com/llvm/llvm-project/pull/171180/files#r2599706437 is resolved, 
since this gets rather uglier if `!isAArch64Auth` needs to stay

https://github.com/llvm/llvm-project/pull/171182
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][ELF][AArch64] Replace memtag hack with less-confusing code (PR #171182)

2025-12-08 Thread Jessica Clarke via llvm-branch-commits


@@ -741,7 +741,7 @@ static void addRelativeReloc(Ctx &ctx, InputSectionBase 
&isec,
   // 
https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative
   if (sym.isTagged() && !isAArch64Auth &&
   (addend < 0 || static_cast(addend) >= sym.getSize()))
-isec.addReloc({expr, type, offsetInSec, addend, &sym});
+isec.addReloc({R_ADDEND_NEG, type, offsetInSec, addend, &sym});

jrtc27 wrote:

I'd like to move this into RelocationBaseSection::addReloc itself, like how is 
done for Elf_Rel, as otherwise any other place that calls 
RelocationBaseSection::add(Relative)Reloc needs to know about this oddity. In 
practice there are none, but the less "weird" all these various ABI extensions 
are in terms of fitting into LLD's APIs the better. Downstream in Morello LLD 
we've similarly hidden the way R_MORELLO_RELATIVE works (it emits a "fragment" 
that describes the capability base/length/permissions, and the dynamic 
relocation's addend is the capability offset) so you "just" call 
RelocationBaseSection::add(Relative)Reloc and all the magic for how to actually 
do that happens for you. Following that for MTE globals would be a good idea 
IMO.

https://github.com/llvm/llvm-project/pull/171182
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] clang/HIP: Avoid using ocml logb (PR #171186)

2025-12-08 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.com/github/pr/llvm/llvm-project/171186?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#171186** https://app.graphite.com/github/pr/llvm/llvm-project/171186?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.com/github/pr/llvm/llvm-project/171186?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#171185** https://app.graphite.com/github/pr/llvm/llvm-project/171185?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/171186
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] clang/HIP: Avoid using ocml logb (PR #171186)

2025-12-08 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/171186

We have special case handling for the logb builtins, so use them.

>From 46c34ddcc57617d4c42839aedd53a96a18853581 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 8 Dec 2025 16:04:44 +0100
Subject: [PATCH] clang/HIP: Avoid using ocml logb

We have special case handling for the logb builtins, so use them.
---
 clang/lib/Headers/__clang_hip_math.h|   4 +-
 clang/test/Headers/__clang_hip_math.hip | 162 +---
 2 files changed, 118 insertions(+), 48 deletions(-)

diff --git a/clang/lib/Headers/__clang_hip_math.h 
b/clang/lib/Headers/__clang_hip_math.h
index 759e742c9d012..03c2721b4ad3c 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -498,7 +498,7 @@ __DEVICE__
 float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __builtin_log2f)(__x); 
}
 
 __DEVICE__
-float logbf(float __x) { return __ocml_logb_f32(__x); }
+float logbf(float __x) { return __builtin_logbf(__x); }
 
 __DEVICE__
 float logf(float __x) { return __FAST_OR_SLOW(__logf, __builtin_logf)(__x); }
@@ -901,7 +901,7 @@ __DEVICE__
 double log2(double __x) { return __ocml_log2_f64(__x); }
 
 __DEVICE__
-double logb(double __x) { return __ocml_logb_f64(__x); }
+double logb(double __x) { return __builtin_logb(__x); }
 
 __DEVICE__
 long int lrint(double __x) { return __builtin_rint(__x); }
diff --git a/clang/test/Headers/__clang_hip_math.hip 
b/clang/test/Headers/__clang_hip_math.hip
index 4163666811c91..426e5af319cbf 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -3871,69 +3871,139 @@ extern "C" __device__ double test_log2(double x) {
   return log2(x);
 }
 
-// DEFAULT-LABEL: define dso_local noundef float @test_logbf(
-// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-LABEL: define dso_local float @test_logbf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
-// DEFAULT-NEXT:[[CALL_I:%.*]] = tail call contract noundef float 
@__ocml_logb_f32(float noundef [[X]]) #[[ATTR13]]
-// DEFAULT-NEXT:ret float [[CALL_I]]
-//
-// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float 
@test_logbf(
-// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) 
local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:[[TMP0:%.*]] = tail call { float, i32 } 
@llvm.frexp.f32.i32(float [[X]])
+// DEFAULT-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// DEFAULT-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// DEFAULT-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// DEFAULT-NEXT:[[TMP4:%.*]] = tail call contract float 
@llvm.fabs.f32(float [[X]])
+// DEFAULT-NEXT:[[TMP5:%.*]] = fcmp contract one float [[TMP4]], 
0x7FF0
+// DEFAULT-NEXT:[[TMP6:%.*]] = select contract i1 [[TMP5]], float 
[[TMP3]], float [[TMP4]]
+// DEFAULT-NEXT:[[TMP7:%.*]] = fcmp contract oeq float [[X]], 0.00e+00
+// DEFAULT-NEXT:[[TMP8:%.*]] = select contract i1 [[TMP7]], float 
0xFFF0, float [[TMP6]]
+// DEFAULT-NEXT:ret float [[TMP8]]
+//
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_logbf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) 
local_unnamed_addr #[[ATTR3]] {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
-// FINITEONLY-NEXT:[[CALL_I:%.*]] = tail call nnan ninf contract noundef 
nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) 
[[X]]) #[[ATTR13]]
-// FINITEONLY-NEXT:ret float [[CALL_I]]
+// FINITEONLY-NEXT:[[TMP0:%.*]] = tail call { float, i32 } 
@llvm.frexp.f32.i32(float nofpclass(nan inf) [[X]])
+// FINITEONLY-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// FINITEONLY-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// FINITEONLY-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// FINITEONLY-NEXT:ret float [[TMP3]]
 //
-// APPROX-LABEL: define dso_local noundef float @test_logbf(
-// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-LABEL: define dso_local float @test_logbf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
-// APPROX-NEXT:[[CALL_I:%.*]] = tail call contract noundef float 
@__ocml_logb_f32(float noundef [[X]]) #[[ATTR13]]
-// APPROX-NEXT:ret float [[CALL_I]]
-//
-// NCRDIV-LABEL: define dso_local noundef float @test_logbf(
-// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:[[TMP0:%.*]] = tail call { float, i32 } 
@llvm.frexp.f32.i32(float [[X]])
+// APPROX-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// APPROX-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// APPROX-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// APPROX-NEXT:[[TMP4:%.*]] = tail call contract float 
@llvm.fabs.f32(float [[X]])

[llvm-branch-commits] [clang] clang/HIP: Avoid using ocml logb (PR #171186)

2025-12-08 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/171186
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC] [FlowSensitive] Fix missing namespace in MockHeaders (PR #170954)

2025-12-08 Thread Jan Voung via llvm-branch-commits

https://github.com/jvoung approved this pull request.

Nice catch!

https://github.com/llvm/llvm-project/pull/170954
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [LifetimeSafety] Add implicit tracking for STL functions (PR #170005)

2025-12-08 Thread Utkarsh Saxena via llvm-branch-commits

https://github.com/usx95 updated 
https://github.com/llvm/llvm-project/pull/170005

>From 5be82f0e9576d5d1c5c74d5935b6c519bd63a9ff Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena 
Date: Sat, 29 Nov 2025 15:07:40 +
Subject: [PATCH] Implicit lifetimebound for std namespace

---
 .../LifetimeSafety/LifetimeAnnotations.h  |  14 ++
 .../LifetimeSafety/FactsGenerator.cpp |   6 +-
 .../LifetimeSafety/LifetimeAnnotations.cpp|  82 
 clang/lib/Analysis/LifetimeSafety/Origins.cpp |   4 +
 clang/lib/Sema/CheckExprLifetime.cpp  |  64 +--
 .../unittests/Analysis/LifetimeSafetyTest.cpp | 180 ++
 6 files changed, 287 insertions(+), 63 deletions(-)

diff --git 
a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h 
b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
index 1a16fb82f9a84..8e26a4d41a957 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
@@ -38,6 +38,20 @@ bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl 
*CMD);
 /// method or because it's a normal assignment operator.
 bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD);
 
+// Returns true if the implicit object argument (this) of a method call should
+// be tracked for GSL lifetime analysis. This applies to STL methods that 
return
+// pointers or references that depend on the lifetime of the object, such as
+// container iterators (begin, end), data accessors (c_str, data, get), or
+// element accessors (operator[], operator*, front, back, at).
+bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee);
+
+// Returns true if the first argument of a free function should be tracked for
+// GSL lifetime analysis. This applies to STL free functions that take a 
pointer
+// to a GSL Owner or Pointer and return a pointer or reference that depends on
+// the lifetime of the argument, such as std::begin, std::data, std::get, or
+// std::any_cast.
+bool shouldTrackFirstArgument(const FunctionDecl *FD);
+
 // Tells whether the type is annotated with [[gsl::Pointer]].
 bool isGslPointerType(QualType QT);
 // Tells whether the type is annotated with [[gsl::Owner]].
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp 
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index c897a5fcd718b..eb219e2f7f334 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -15,6 +15,7 @@
 #include "clang/Analysis/Analyses/LifetimeSafety/Origins.h"
 #include "clang/Analysis/Analyses/PostOrderCFGView.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TimeProfiler.h"
 
@@ -410,11 +411,14 @@ void FactsGenerator::handleFunctionCall(const Expr *Call,
 Method && Method->isInstance()) {
   if (I == 0)
 // For the 'this' argument, the attribute is on the method itself.
-return implicitObjectParamIsLifetimeBound(Method);
+return implicitObjectParamIsLifetimeBound(Method) ||
+   shouldTrackImplicitObjectArg(Method);
   if ((I - 1) < Method->getNumParams())
 // For explicit arguments, find the corresponding parameter
 // declaration.
 PVD = Method->getParamDecl(I - 1);
+} else if (I == 0 && shouldTrackFirstArgument(FD)) {
+  return true;
 } else if (I < FD->getNumParams()) {
   // For free functions or static methods.
   PVD = FD->getParamDecl(I);
diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp 
b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
index 54e343fc2ee5e..860aa5373a32c 100644
--- a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
@@ -71,6 +71,88 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl 
*FD) {
   return isNormalAssignmentOperator(FD);
 }
 
+// Decl::isInStdNamespace will return false for iterators in some STL
+// implementations due to them being defined in a namespace outside of the std
+// namespace.
+static bool isInStlNamespace(const Decl *D) {
+  const DeclContext *DC = D->getDeclContext();
+  if (!DC)
+return false;
+  if (const auto *ND = dyn_cast(DC))
+if (const IdentifierInfo *II = ND->getIdentifier()) {
+  StringRef Name = II->getName();
+  if (Name.size() >= 2 && Name.front() == '_' &&
+  (Name[1] == '_' || isUppercase(Name[1])))
+return true;
+}
+
+  return DC->isStdNamespace();
+}
+
+static bool isPointerLikeType(QualType QT) {
+  return isGslPointerType(QT) || QT->isPointerType() || QT->isNullPtrType();
+}
+
+bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
+  if (auto *Conv = dyn_cast_or_null(Callee))
+if (isGslPointerType(Conv->getConversionType()) &&
+Callee->getParent()->hasAttr(

[llvm-branch-commits] [clang] [LifetimeSafety] Track moved declarations to prevent false positives (PR #170007)

2025-12-08 Thread Utkarsh Saxena via llvm-branch-commits

https://github.com/usx95 updated 
https://github.com/llvm/llvm-project/pull/170007

>From 444baf2e0a43002cdcc8f516cd94deab30c3be57 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena 
Date: Sat, 29 Nov 2025 16:43:06 +
Subject: [PATCH] std_move false positive

---
 .../Analyses/LifetimeSafety/FactsGenerator.h  |  5 
 .../LifetimeSafety/FactsGenerator.cpp | 23 +++
 clang/test/Sema/warn-lifetime-safety.cpp  | 18 +++
 3 files changed, 46 insertions(+)

diff --git 
a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h 
b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
index ffe9101606a97..ac97991342b86 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
@@ -102,6 +102,11 @@ class FactsGenerator : public 
ConstStmtVisitor {
   // corresponding to the left-hand side is updated to be a "write", thereby
   // exempting it from the check.
   llvm::DenseMap UseFacts;
+
+  // Tracks declarations that have been moved via std::move. This is used to
+  // prevent false positives when the original owner is destroyed after the
+  // value has been moved. This tracking is flow-insensitive.
+  llvm::DenseSet MovedDecls;
 };
 
 } // namespace clang::lifetimes::internal
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp 
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index 3ff817de0d18a..bf617261cc23b 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -183,9 +183,27 @@ void FactsGenerator::VisitCXXMemberCallExpr(const 
CXXMemberCallExpr *MCE) {
   }
 }
 
+static bool isStdMove(const FunctionDecl *FD) {
+  return FD && FD->isInStdNamespace() && FD->getIdentifier() &&
+ FD->getName() == "move";
+}
+
 void FactsGenerator::VisitCallExpr(const CallExpr *CE) {
   handleFunctionCall(CE, CE->getDirectCallee(),
  {CE->getArgs(), CE->getNumArgs()});
+  // Track declarations that are moved via std::move.
+  // This is a flow-insensitive approximation: once a declaration is moved
+  // anywhere in the function, it's treated as moved everywhere. This can lead
+  // to false negatives on control flow paths where the value is not actually
+  // moved, but these are considered lower priority than the false positives
+  // this tracking prevents.
+  // TODO: The ideal solution would be flow-sensitive ownership tracking that
+  // records where values are moved from and to, but this is more complex.
+  if (isStdMove(CE->getDirectCallee()))
+if (CE->getNumArgs() == 1)
+  if (auto *DRE =
+  dyn_cast(CE->getArg(0)->IgnoreParenImpCasts()))
+MovedDecls.insert(DRE->getDecl());
 }
 
 void FactsGenerator::VisitCXXNullPtrLiteralExpr(
@@ -364,6 +382,11 @@ void FactsGenerator::handleLifetimeEnds(const 
CFGLifetimeEnds &LifetimeEnds) {
   // Iterate through all loans to see if any expire.
   for (const auto *Loan : FactMgr.getLoanMgr().getLoans()) {
 if (const auto *BL = dyn_cast(Loan)) {
+  // Skip loans for declarations that have been moved. When a value is
+  // moved, the original owner no longer has ownership and its destruction
+  // should not cause the loan to expire, preventing false positives.
+  if (MovedDecls.contains(BL->getAccessPath().D))
+continue;
   // Check if the loan is for a stack variable and if that variable
   // is the one being destructed.
   if (BL->getAccessPath().D == LifetimeEndsVD)
diff --git a/clang/test/Sema/warn-lifetime-safety.cpp 
b/clang/test/Sema/warn-lifetime-safety.cpp
index f22c73cfeb784..97a79cc4ce102 100644
--- a/clang/test/Sema/warn-lifetime-safety.cpp
+++ b/clang/test/Sema/warn-lifetime-safety.cpp
@@ -1,9 +1,14 @@
 // RUN: %clang_cc1 -fsyntax-only -fexperimental-lifetime-safety 
-Wexperimental-lifetime-safety -Wno-dangling -verify %s
 
+#include "Inputs/lifetime-analysis.h"
+
 struct View;
 
 struct [[gsl::Owner]] MyObj {
   int id;
+  MyObj();
+  MyObj(int);
+  MyObj(const MyObj&);
   ~MyObj() {}  // Non-trivial destructor
   MyObj operator+(MyObj);
   
@@ -1297,3 +1302,16 @@ void add(int c, MyObj* node) {
   arr[4] = node;
 }
 } // namespace CppCoverage
+
+namespace do_not_warn_on_std_move {
+void silenced() {
+  MyObj b;
+  View v;
+  {
+MyObj a;
+v = a;
+b = std::move(a); // No warning for 'a' being moved.
+  }
+  (void)v;
+}
+} // namespace do_not_warn_on_std_move

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [LifetimeSafety] Add implicit tracking for STL functions (PR #170005)

2025-12-08 Thread Utkarsh Saxena via llvm-branch-commits

https://github.com/usx95 updated 
https://github.com/llvm/llvm-project/pull/170005

>From 5be82f0e9576d5d1c5c74d5935b6c519bd63a9ff Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena 
Date: Sat, 29 Nov 2025 15:07:40 +
Subject: [PATCH] Implicit lifetimebound for std namespace

---
 .../LifetimeSafety/LifetimeAnnotations.h  |  14 ++
 .../LifetimeSafety/FactsGenerator.cpp |   6 +-
 .../LifetimeSafety/LifetimeAnnotations.cpp|  82 
 clang/lib/Analysis/LifetimeSafety/Origins.cpp |   4 +
 clang/lib/Sema/CheckExprLifetime.cpp  |  64 +--
 .../unittests/Analysis/LifetimeSafetyTest.cpp | 180 ++
 6 files changed, 287 insertions(+), 63 deletions(-)

diff --git 
a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h 
b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
index 1a16fb82f9a84..8e26a4d41a957 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
@@ -38,6 +38,20 @@ bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl 
*CMD);
 /// method or because it's a normal assignment operator.
 bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD);
 
+// Returns true if the implicit object argument (this) of a method call should
+// be tracked for GSL lifetime analysis. This applies to STL methods that 
return
+// pointers or references that depend on the lifetime of the object, such as
+// container iterators (begin, end), data accessors (c_str, data, get), or
+// element accessors (operator[], operator*, front, back, at).
+bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee);
+
+// Returns true if the first argument of a free function should be tracked for
+// GSL lifetime analysis. This applies to STL free functions that take a 
pointer
+// to a GSL Owner or Pointer and return a pointer or reference that depends on
+// the lifetime of the argument, such as std::begin, std::data, std::get, or
+// std::any_cast.
+bool shouldTrackFirstArgument(const FunctionDecl *FD);
+
 // Tells whether the type is annotated with [[gsl::Pointer]].
 bool isGslPointerType(QualType QT);
 // Tells whether the type is annotated with [[gsl::Owner]].
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp 
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index c897a5fcd718b..eb219e2f7f334 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -15,6 +15,7 @@
 #include "clang/Analysis/Analyses/LifetimeSafety/Origins.h"
 #include "clang/Analysis/Analyses/PostOrderCFGView.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TimeProfiler.h"
 
@@ -410,11 +411,14 @@ void FactsGenerator::handleFunctionCall(const Expr *Call,
 Method && Method->isInstance()) {
   if (I == 0)
 // For the 'this' argument, the attribute is on the method itself.
-return implicitObjectParamIsLifetimeBound(Method);
+return implicitObjectParamIsLifetimeBound(Method) ||
+   shouldTrackImplicitObjectArg(Method);
   if ((I - 1) < Method->getNumParams())
 // For explicit arguments, find the corresponding parameter
 // declaration.
 PVD = Method->getParamDecl(I - 1);
+} else if (I == 0 && shouldTrackFirstArgument(FD)) {
+  return true;
 } else if (I < FD->getNumParams()) {
   // For free functions or static methods.
   PVD = FD->getParamDecl(I);
diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp 
b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
index 54e343fc2ee5e..860aa5373a32c 100644
--- a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
@@ -71,6 +71,88 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl 
*FD) {
   return isNormalAssignmentOperator(FD);
 }
 
+// Decl::isInStdNamespace will return false for iterators in some STL
+// implementations due to them being defined in a namespace outside of the std
+// namespace.
+static bool isInStlNamespace(const Decl *D) {
+  const DeclContext *DC = D->getDeclContext();
+  if (!DC)
+return false;
+  if (const auto *ND = dyn_cast(DC))
+if (const IdentifierInfo *II = ND->getIdentifier()) {
+  StringRef Name = II->getName();
+  if (Name.size() >= 2 && Name.front() == '_' &&
+  (Name[1] == '_' || isUppercase(Name[1])))
+return true;
+}
+
+  return DC->isStdNamespace();
+}
+
+static bool isPointerLikeType(QualType QT) {
+  return isGslPointerType(QT) || QT->isPointerType() || QT->isNullPtrType();
+}
+
+bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
+  if (auto *Conv = dyn_cast_or_null(Callee))
+if (isGslPointerType(Conv->getConversionType()) &&
+Callee->getParent()->hasAttr(

[llvm-branch-commits] [clang] [LifetimeSafety] Track moved declarations to prevent false positives (PR #170007)

2025-12-08 Thread Utkarsh Saxena via llvm-branch-commits

https://github.com/usx95 updated 
https://github.com/llvm/llvm-project/pull/170007

>From 444baf2e0a43002cdcc8f516cd94deab30c3be57 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena 
Date: Sat, 29 Nov 2025 16:43:06 +
Subject: [PATCH] std_move false positive

---
 .../Analyses/LifetimeSafety/FactsGenerator.h  |  5 
 .../LifetimeSafety/FactsGenerator.cpp | 23 +++
 clang/test/Sema/warn-lifetime-safety.cpp  | 18 +++
 3 files changed, 46 insertions(+)

diff --git 
a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h 
b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
index ffe9101606a97..ac97991342b86 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
@@ -102,6 +102,11 @@ class FactsGenerator : public 
ConstStmtVisitor {
   // corresponding to the left-hand side is updated to be a "write", thereby
   // exempting it from the check.
   llvm::DenseMap UseFacts;
+
+  // Tracks declarations that have been moved via std::move. This is used to
+  // prevent false positives when the original owner is destroyed after the
+  // value has been moved. This tracking is flow-insensitive.
+  llvm::DenseSet MovedDecls;
 };
 
 } // namespace clang::lifetimes::internal
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp 
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index 3ff817de0d18a..bf617261cc23b 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -183,9 +183,27 @@ void FactsGenerator::VisitCXXMemberCallExpr(const 
CXXMemberCallExpr *MCE) {
   }
 }
 
+static bool isStdMove(const FunctionDecl *FD) {
+  return FD && FD->isInStdNamespace() && FD->getIdentifier() &&
+ FD->getName() == "move";
+}
+
 void FactsGenerator::VisitCallExpr(const CallExpr *CE) {
   handleFunctionCall(CE, CE->getDirectCallee(),
  {CE->getArgs(), CE->getNumArgs()});
+  // Track declarations that are moved via std::move.
+  // This is a flow-insensitive approximation: once a declaration is moved
+  // anywhere in the function, it's treated as moved everywhere. This can lead
+  // to false negatives on control flow paths where the value is not actually
+  // moved, but these are considered lower priority than the false positives
+  // this tracking prevents.
+  // TODO: The ideal solution would be flow-sensitive ownership tracking that
+  // records where values are moved from and to, but this is more complex.
+  if (isStdMove(CE->getDirectCallee()))
+if (CE->getNumArgs() == 1)
+  if (auto *DRE =
+  dyn_cast(CE->getArg(0)->IgnoreParenImpCasts()))
+MovedDecls.insert(DRE->getDecl());
 }
 
 void FactsGenerator::VisitCXXNullPtrLiteralExpr(
@@ -364,6 +382,11 @@ void FactsGenerator::handleLifetimeEnds(const 
CFGLifetimeEnds &LifetimeEnds) {
   // Iterate through all loans to see if any expire.
   for (const auto *Loan : FactMgr.getLoanMgr().getLoans()) {
 if (const auto *BL = dyn_cast(Loan)) {
+  // Skip loans for declarations that have been moved. When a value is
+  // moved, the original owner no longer has ownership and its destruction
+  // should not cause the loan to expire, preventing false positives.
+  if (MovedDecls.contains(BL->getAccessPath().D))
+continue;
   // Check if the loan is for a stack variable and if that variable
   // is the one being destructed.
   if (BL->getAccessPath().D == LifetimeEndsVD)
diff --git a/clang/test/Sema/warn-lifetime-safety.cpp 
b/clang/test/Sema/warn-lifetime-safety.cpp
index f22c73cfeb784..97a79cc4ce102 100644
--- a/clang/test/Sema/warn-lifetime-safety.cpp
+++ b/clang/test/Sema/warn-lifetime-safety.cpp
@@ -1,9 +1,14 @@
 // RUN: %clang_cc1 -fsyntax-only -fexperimental-lifetime-safety 
-Wexperimental-lifetime-safety -Wno-dangling -verify %s
 
+#include "Inputs/lifetime-analysis.h"
+
 struct View;
 
 struct [[gsl::Owner]] MyObj {
   int id;
+  MyObj();
+  MyObj(int);
+  MyObj(const MyObj&);
   ~MyObj() {}  // Non-trivial destructor
   MyObj operator+(MyObj);
   
@@ -1297,3 +1302,16 @@ void add(int c, MyObj* node) {
   arr[4] = node;
 }
 } // namespace CppCoverage
+
+namespace do_not_warn_on_std_move {
+void silenced() {
+  MyObj b;
+  View v;
+  {
+MyObj a;
+v = a;
+b = std::move(a); // No warning for 'a' being moved.
+  }
+  (void)v;
+}
+} // namespace do_not_warn_on_std_move

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [LifetimeSafety] Add origin tracking for pointer dereference (PR #170006)

2025-12-08 Thread Utkarsh Saxena via llvm-branch-commits

https://github.com/usx95 updated 
https://github.com/llvm/llvm-project/pull/170006

>From 9308d55447964a7e52d252a7273bbed1a4fd6832 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena 
Date: Sat, 29 Nov 2025 15:49:00 +
Subject: [PATCH] dereference_operator

---
 .../LifetimeSafety/FactsGenerator.cpp |  4 
 .../Sema/warn-lifetime-safety-dataflow.cpp|  6 +
 .../Sema/warn-lifetime-safety-suggestions.cpp |  9 +---
 clang/test/Sema/warn-lifetime-safety.cpp  | 22 +--
 4 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp 
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index eb219e2f7f334..3ff817de0d18a 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -239,6 +239,10 @@ void FactsGenerator::VisitUnaryOperator(const 
UnaryOperator *UO) {
 // origin of this UnaryOperator expression.
 killAndFlowOrigin(*UO, *SubExpr);
   }
+  if (UO->getOpcode() == UO_Deref) {
+const Expr *SubExpr = UO->getSubExpr();
+killAndFlowOrigin(*UO, *SubExpr);
+  }
 }
 
 void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) {
diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp 
b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
index 6d5711deba1cf..6fc7c776f935c 100644
--- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
@@ -152,6 +152,12 @@ void pointer_indirection() {
 // CHECK-NEXT:   Dest: {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int *)
 // CHECK-NEXT:   Src:  [[O_PP_INNER]] (Decl: pp, Type : int *)
 // CHECK:   OriginFlow:
+// CHECK-NEXT:   Dest: {{[0-9]+}} (Expr: UnaryOperator, Type : int *&)
+// CHECK-NEXT:   Src:  {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int **)
+// CHECK:   OriginFlow:
+// CHECK-NEXT:   Dest: {{[0-9]+}} (Expr: UnaryOperator, Type : int *)
+// CHECK-NEXT:   Src:  {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int *)
+// CHECK:   OriginFlow:
 // CHECK-NEXT:   Dest: {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int *)
 // CHECK-NEXT:   Src:  {{[0-9]+}} (Expr: UnaryOperator, Type : int *)
 // CHECK:   OriginFlow:
diff --git a/clang/test/Sema/warn-lifetime-safety-suggestions.cpp 
b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
index 280f774b4664c..abc92dd86cbde 100644
--- a/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
@@ -49,9 +49,12 @@ MyObj* return_ptr_to_ref(MyObj& a) { // expected-warning 
{{param should be marke
   return &a; // expected-note {{param returned here}}
 }
 
-// FIXME: Dereference does not propagate loans.
-MyObj& return_ref_to_ptr(MyObj* a) {
-  return *a;
+MyObj& return_ref_to_ptr(MyObj* a) {  // expected-warning {{param should be 
marked [[clang::lifetimebound]]}}
+  return *a;  // expected-note {{param returned here}}
+}
+
+View return_ref_to_ptr_multiple(MyObj* a) {  // expected-warning {{param 
should be marked [[clang::lifetimebound]]}}
+  return *(&(*(&(*a;  // expected-note {{param returned here}}
 }
 
 View return_view_from_reference(MyObj& p) {  // expected-warning {{param 
should be marked [[clang::lifetimebound]]}}
diff --git a/clang/test/Sema/warn-lifetime-safety.cpp 
b/clang/test/Sema/warn-lifetime-safety.cpp
index e62c3b69b040b..f22c73cfeb784 100644
--- a/clang/test/Sema/warn-lifetime-safety.cpp
+++ b/clang/test/Sema/warn-lifetime-safety.cpp
@@ -596,10 +596,10 @@ const int* return_pointer_to_parameter_via_reference(int 
a, int b, bool cond) {
 const int* d = &c;
 return d; // expected-note 2 {{returned here}}
 }
-// FIXME: Dereference of a pointer does not track the reference.
+
 const int& return_pointer_to_parameter_via_reference_1(int a) {
-const int* d = &a;
-return *d;
+const int* d = &a; // expected-warning {{address of stack memory is 
returned later}}
+return *d;// expected-note {{returned here}}
 }
 
 const int& get_ref_to_local() {
@@ -1118,24 +1118,24 @@ struct MyObjStorage {
   const MyObj *end() const { return objs + 1; }
 };
 
-// FIXME: Detect use-after-scope. Dereference pointer does not propagate the 
origins.
 void range_based_for_use_after_scope() {
   View v;
   {
 MyObjStorage s;
-for (const MyObj &o : s) {
+for (const MyObj &o : s) { // expected-warning {{object whose reference is 
captured does not live long enough}}
   v = o;
 }
-  }
-  v.use();
+  } // expected-note {{destroyed here}}
+  v.use(); // expected-note {{later used here}}
 }
-// FIXME: Detect use-after-return. Dereference pointer does not propagate the 
origins.
+
 View range_based_for_use_after_return() {
   MyObjStorage s;
-  for (const MyObj &o : s) {
-return o;
+  for (const MyObj &o : s) { // expected-warning {{address of stack memory is 
returned later}}
+return o;  // expected-note {{returned here}}
   }
-  return *s.begin();
+  return *

[llvm-branch-commits] [clang] [LifetimeSafety] Add origin tracking for pointer dereference (PR #170006)

2025-12-08 Thread Utkarsh Saxena via llvm-branch-commits

https://github.com/usx95 updated 
https://github.com/llvm/llvm-project/pull/170006

>From 9308d55447964a7e52d252a7273bbed1a4fd6832 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena 
Date: Sat, 29 Nov 2025 15:49:00 +
Subject: [PATCH] dereference_operator

---
 .../LifetimeSafety/FactsGenerator.cpp |  4 
 .../Sema/warn-lifetime-safety-dataflow.cpp|  6 +
 .../Sema/warn-lifetime-safety-suggestions.cpp |  9 +---
 clang/test/Sema/warn-lifetime-safety.cpp  | 22 +--
 4 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp 
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index eb219e2f7f334..3ff817de0d18a 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -239,6 +239,10 @@ void FactsGenerator::VisitUnaryOperator(const 
UnaryOperator *UO) {
 // origin of this UnaryOperator expression.
 killAndFlowOrigin(*UO, *SubExpr);
   }
+  if (UO->getOpcode() == UO_Deref) {
+const Expr *SubExpr = UO->getSubExpr();
+killAndFlowOrigin(*UO, *SubExpr);
+  }
 }
 
 void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) {
diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp 
b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
index 6d5711deba1cf..6fc7c776f935c 100644
--- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
@@ -152,6 +152,12 @@ void pointer_indirection() {
 // CHECK-NEXT:   Dest: {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int *)
 // CHECK-NEXT:   Src:  [[O_PP_INNER]] (Decl: pp, Type : int *)
 // CHECK:   OriginFlow:
+// CHECK-NEXT:   Dest: {{[0-9]+}} (Expr: UnaryOperator, Type : int *&)
+// CHECK-NEXT:   Src:  {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int **)
+// CHECK:   OriginFlow:
+// CHECK-NEXT:   Dest: {{[0-9]+}} (Expr: UnaryOperator, Type : int *)
+// CHECK-NEXT:   Src:  {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int *)
+// CHECK:   OriginFlow:
 // CHECK-NEXT:   Dest: {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int *)
 // CHECK-NEXT:   Src:  {{[0-9]+}} (Expr: UnaryOperator, Type : int *)
 // CHECK:   OriginFlow:
diff --git a/clang/test/Sema/warn-lifetime-safety-suggestions.cpp 
b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
index 280f774b4664c..abc92dd86cbde 100644
--- a/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
@@ -49,9 +49,12 @@ MyObj* return_ptr_to_ref(MyObj& a) { // expected-warning 
{{param should be marke
   return &a; // expected-note {{param returned here}}
 }
 
-// FIXME: Dereference does not propagate loans.
-MyObj& return_ref_to_ptr(MyObj* a) {
-  return *a;
+MyObj& return_ref_to_ptr(MyObj* a) {  // expected-warning {{param should be 
marked [[clang::lifetimebound]]}}
+  return *a;  // expected-note {{param returned here}}
+}
+
+View return_ref_to_ptr_multiple(MyObj* a) {  // expected-warning {{param 
should be marked [[clang::lifetimebound]]}}
+  return *(&(*(&(*a;  // expected-note {{param returned here}}
 }
 
 View return_view_from_reference(MyObj& p) {  // expected-warning {{param 
should be marked [[clang::lifetimebound]]}}
diff --git a/clang/test/Sema/warn-lifetime-safety.cpp 
b/clang/test/Sema/warn-lifetime-safety.cpp
index e62c3b69b040b..f22c73cfeb784 100644
--- a/clang/test/Sema/warn-lifetime-safety.cpp
+++ b/clang/test/Sema/warn-lifetime-safety.cpp
@@ -596,10 +596,10 @@ const int* return_pointer_to_parameter_via_reference(int 
a, int b, bool cond) {
 const int* d = &c;
 return d; // expected-note 2 {{returned here}}
 }
-// FIXME: Dereference of a pointer does not track the reference.
+
 const int& return_pointer_to_parameter_via_reference_1(int a) {
-const int* d = &a;
-return *d;
+const int* d = &a; // expected-warning {{address of stack memory is 
returned later}}
+return *d;// expected-note {{returned here}}
 }
 
 const int& get_ref_to_local() {
@@ -1118,24 +1118,24 @@ struct MyObjStorage {
   const MyObj *end() const { return objs + 1; }
 };
 
-// FIXME: Detect use-after-scope. Dereference pointer does not propagate the 
origins.
 void range_based_for_use_after_scope() {
   View v;
   {
 MyObjStorage s;
-for (const MyObj &o : s) {
+for (const MyObj &o : s) { // expected-warning {{object whose reference is 
captured does not live long enough}}
   v = o;
 }
-  }
-  v.use();
+  } // expected-note {{destroyed here}}
+  v.use(); // expected-note {{later used here}}
 }
-// FIXME: Detect use-after-return. Dereference pointer does not propagate the 
origins.
+
 View range_based_for_use_after_return() {
   MyObjStorage s;
-  for (const MyObj &o : s) {
-return o;
+  for (const MyObj &o : s) { // expected-warning {{address of stack memory is 
returned later}}
+return o;  // expected-note {{returned here}}
   }
-  return *s.begin();
+  return *

[llvm-branch-commits] [mlir] [mlir][py] partially use mlir_type_subclass for IRTypes.cpp (PR #171143)

2025-12-08 Thread Oleksandr Alex Zinenko via llvm-branch-commits

https://github.com/ftynse created 
https://github.com/llvm/llvm-project/pull/171143

Port the bindings for non-shaped builtin types in IRTypes.cpp to use the 
`mlir_type_subclass` mechanism used by non-builtin types. This is part of a 
longer-term cleanup to only support one subclassing mechanism. Eventually, the 
`PyConcreteType` mechanism will be removed.

This required a surgery in the type casters and the `mlir_type_subclass` logic 
to avoid circular imports of the `_mlir.ir` module that would otherwise when 
using `mlir_type_subclass` to define classes in the `_mlir.ir` module.

Tests are updated to use the `.get_static_typeid()` function instead of the 
`.static_typeid` property that was specific to builtin types due to the 
`PyConcreteType` mechanism. The change should be NFC otherwise.

>From deac26450350ba40b9f9357f68ec3a5e458b43d6 Mon Sep 17 00:00:00 2001
From: Alex Zinenko 
Date: Mon, 8 Dec 2025 15:50:41 +0100
Subject: [PATCH] [mlir][py] partially use mlir_type_subclass for IRTypes.cpp

Port the bindings for non-shaped builtin types in IRTypes.cpp to use the
`mlir_type_subclass` mechanism used by non-builtin types. This is part of a
longer-term cleanup to only support one subclassing mechanism. Eventually, the
`PyConcreteType` mechanism will be removed.

This required a surgery in the type casters and the `mlir_type_subclass` logic
to avoid circular imports of the `_mlir.ir` module that would otherwise when
using `mlir_type_subclass` to define classes in the `_mlir.ir` module.

Tests are updated to use the `.get_static_typeid()` function instead of the
`.static_typeid` property that was specific to builtin types due to the
`PyConcreteType` mechanism. The change should be NFC otherwise.
---
 .../mlir/Bindings/Python/NanobindAdaptors.h   |   41 +-
 mlir/lib/Bindings/Python/IRTypes.cpp  | 1029 ++---
 mlir/lib/Bindings/Python/MainModule.cpp   |   15 +
 mlir/test/python/dialects/arith_dialect.py|8 +-
 mlir/test/python/ir/builtin_types.py  |   11 +-
 mlir/test/python/ir/value.py  |6 +-
 6 files changed, 425 insertions(+), 685 deletions(-)

diff --git a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h 
b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
index 6594670abaaa7..f678f57527e97 100644
--- a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
+++ b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
@@ -371,16 +371,22 @@ struct type_caster {
 }
 return false;
   }
-  static handle from_cpp(MlirTypeID v, rv_policy,
- cleanup_list *cleanup) noexcept {
+
+  static handle
+  from_cpp_given_module(MlirTypeID v,
+const nanobind::module_ &module) noexcept {
 if (v.ptr == nullptr)
   return nanobind::none();
 nanobind::object capsule =
 nanobind::steal(mlirPythonTypeIDToCapsule(v));
-return mlir::python::irModule()
-.attr("TypeID")
+return module.attr("TypeID")
 .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
 .release();
+  }
+
+  static handle from_cpp(MlirTypeID v, rv_policy,
+ cleanup_list *cleanup) noexcept {
+return from_cpp_given_module(v, mlir::python::irModule());
   };
 };
 
@@ -602,9 +608,12 @@ class mlir_type_subclass : public pure_subclass {
   /// Subclasses by looking up the super-class dynamically.
   mlir_type_subclass(nanobind::handle scope, const char *typeClassName,
  IsAFunctionTy isaFunction,
- GetTypeIDFunctionTy getTypeIDFunction = nullptr)
-  : mlir_type_subclass(scope, typeClassName, isaFunction,
-   irModule().attr("Type"), getTypeIDFunction) {}
+ GetTypeIDFunctionTy getTypeIDFunction = nullptr,
+ const nanobind::module_ *mlirIrModule = nullptr)
+  : mlir_type_subclass(
+scope, typeClassName, isaFunction,
+(mlirIrModule != nullptr ? *mlirIrModule : 
irModule()).attr("Type"),
+getTypeIDFunction, mlirIrModule) {}
 
   /// Subclasses with a provided mlir.ir.Type super-class. This must
   /// be used if the subclass is being defined in the same extension module
@@ -613,7 +622,8 @@ class mlir_type_subclass : public pure_subclass {
   mlir_type_subclass(nanobind::handle scope, const char *typeClassName,
  IsAFunctionTy isaFunction,
  const nanobind::object &superCls,
- GetTypeIDFunctionTy getTypeIDFunction = nullptr)
+ GetTypeIDFunctionTy getTypeIDFunction = nullptr,
+ const nanobind::module_ *mlirIrModule = nullptr)
   : pure_subclass(scope, typeClassName, superCls) {
 // Casting constructor. Note that it is hard, if not impossible, to 
properly
 // call chain to parent `__init__` in nanobind due to its special handling
@@ -672,9 +682,18 @@ class mlir_type_subclass : public pure_subclass {
   nanobind::sig("def get_s

[llvm-branch-commits] [mlir] [mlir][py] partially use mlir_type_subclass for IRTypes.cpp (PR #171143)

2025-12-08 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: Python code formatter, darker found issues in your code. :warning:



You can test this locally with the following command:


``bash
darker --check --diff -r origin/main...HEAD 
mlir/test/python/dialects/arith_dialect.py mlir/test/python/ir/builtin_types.py 
mlir/test/python/ir/value.py
``

:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:





View the diff from darker here.


``diff
--- ir/builtin_types.py 2025-12-08 14:50:41.00 +
+++ ir/builtin_types.py 2025-12-08 14:58:38.795311 +
@@ -713,11 +713,16 @@
 # Test getTypeIdFunction agrees with
 # mlirTypeGetTypeID(self) for an instance.
 # CHECK: all equal
 for t1, t2 in types:
 # TODO: remove the alternative once mlir_type_subclass transition 
is complete.
-tid1, tid2 = t1.static_typeid if hasattr(t1, "static_typeid") else 
t1.get_static_typeid(), Type(t2).typeid
+tid1, tid2 = (
+t1.static_typeid
+if hasattr(t1, "static_typeid")
+else t1.get_static_typeid(),
+Type(t2).typeid,
+)
 assert tid1 == tid2 and hash(tid1) == hash(
 tid2
 ), f"expected hash and value equality {t1} {t2}"
 else:
 print("all equal")
@@ -728,11 +733,15 @@
 assert len(typeid_dict)
 
 # CHECK: all equal
 for t1, t2 in typeid_dict.items():
 # TODO: remove the alternative once mlir_type_subclass transition 
is complete.
-tid1 = t1.static_typeid if hasattr(t1, "static_typeid") else 
t1.get_static_typeid()
+tid1 = (
+t1.static_typeid
+if hasattr(t1, "static_typeid")
+else t1.get_static_typeid()
+)
 assert tid1 == t2.typeid and hash(tid1) == hash(
 t2.typeid
 ), f"expected hash and value equality {t1} {t2}"
 else:
 print("all equal")

``




https://github.com/llvm/llvm-project/pull/171143
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][py] partially use mlir_type_subclass for IRTypes.cpp (PR #171143)

2025-12-08 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Oleksandr "Alex" Zinenko (ftynse)


Changes

Port the bindings for non-shaped builtin types in IRTypes.cpp to use the 
`mlir_type_subclass` mechanism used by non-builtin types. This is part of a 
longer-term cleanup to only support one subclassing mechanism. Eventually, the 
`PyConcreteType` mechanism will be removed.

This required a surgery in the type casters and the `mlir_type_subclass` logic 
to avoid circular imports of the `_mlir.ir` module that would otherwise when 
using `mlir_type_subclass` to define classes in the `_mlir.ir` module.

Tests are updated to use the `.get_static_typeid()` function instead of the 
`.static_typeid` property that was specific to builtin types due to the 
`PyConcreteType` mechanism. The change should be NFC otherwise.

---

Patch is 49.00 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/171143.diff


6 Files Affected:

- (modified) mlir/include/mlir/Bindings/Python/NanobindAdaptors.h (+30-11) 
- (modified) mlir/lib/Bindings/Python/IRTypes.cpp (+366-663) 
- (modified) mlir/lib/Bindings/Python/MainModule.cpp (+15) 
- (modified) mlir/test/python/dialects/arith_dialect.py (+4-4) 
- (modified) mlir/test/python/ir/builtin_types.py (+7-4) 
- (modified) mlir/test/python/ir/value.py (+3-3) 


``diff
diff --git a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h 
b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
index 6594670abaaa7..f678f57527e97 100644
--- a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
+++ b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
@@ -371,16 +371,22 @@ struct type_caster {
 }
 return false;
   }
-  static handle from_cpp(MlirTypeID v, rv_policy,
- cleanup_list *cleanup) noexcept {
+
+  static handle
+  from_cpp_given_module(MlirTypeID v,
+const nanobind::module_ &module) noexcept {
 if (v.ptr == nullptr)
   return nanobind::none();
 nanobind::object capsule =
 nanobind::steal(mlirPythonTypeIDToCapsule(v));
-return mlir::python::irModule()
-.attr("TypeID")
+return module.attr("TypeID")
 .attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
 .release();
+  }
+
+  static handle from_cpp(MlirTypeID v, rv_policy,
+ cleanup_list *cleanup) noexcept {
+return from_cpp_given_module(v, mlir::python::irModule());
   };
 };
 
@@ -602,9 +608,12 @@ class mlir_type_subclass : public pure_subclass {
   /// Subclasses by looking up the super-class dynamically.
   mlir_type_subclass(nanobind::handle scope, const char *typeClassName,
  IsAFunctionTy isaFunction,
- GetTypeIDFunctionTy getTypeIDFunction = nullptr)
-  : mlir_type_subclass(scope, typeClassName, isaFunction,
-   irModule().attr("Type"), getTypeIDFunction) {}
+ GetTypeIDFunctionTy getTypeIDFunction = nullptr,
+ const nanobind::module_ *mlirIrModule = nullptr)
+  : mlir_type_subclass(
+scope, typeClassName, isaFunction,
+(mlirIrModule != nullptr ? *mlirIrModule : 
irModule()).attr("Type"),
+getTypeIDFunction, mlirIrModule) {}
 
   /// Subclasses with a provided mlir.ir.Type super-class. This must
   /// be used if the subclass is being defined in the same extension module
@@ -613,7 +622,8 @@ class mlir_type_subclass : public pure_subclass {
   mlir_type_subclass(nanobind::handle scope, const char *typeClassName,
  IsAFunctionTy isaFunction,
  const nanobind::object &superCls,
- GetTypeIDFunctionTy getTypeIDFunction = nullptr)
+ GetTypeIDFunctionTy getTypeIDFunction = nullptr,
+ const nanobind::module_ *mlirIrModule = nullptr)
   : pure_subclass(scope, typeClassName, superCls) {
 // Casting constructor. Note that it is hard, if not impossible, to 
properly
 // call chain to parent `__init__` in nanobind due to its special handling
@@ -672,9 +682,18 @@ class mlir_type_subclass : public pure_subclass {
   nanobind::sig("def get_static_typeid() -> " 
MAKE_MLIR_PYTHON_QUALNAME("ir.TypeID"))
   // clang-format on
   );
-  nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
-  .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)(
-  getTypeIDFunction())(nanobind::cpp_function(
+
+  // Directly call the caster implementation given the "ir" module,
+  // otherwise it may trigger recursive import as the default caster
+  // attempts to import the "ir" module.
+  MlirTypeID typeID = getTypeIDFunction();
+  mlirIrModule = mlirIrModule ? mlirIrModule : &irModule();
+  nanobind::handle pyTypeID =
+  nanobind::detail::type_caster::from_cpp_given_module(
+  typeID, *mlirIrModule);
+
+  mlirIrModule->attr(MLIR_PYTHON_CAPI_TYPE_CA

[llvm-branch-commits] [llvm] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize binary (PR #171149)

2025-12-08 Thread Gergely Bálint via llvm-branch-commits


@@ -0,0 +1,46 @@
+# This test checks that BOLT can generate BTI landing pads for targets of 
stubs inserted in LongJmp.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %s %cflags -Wl,-q -o %t -mbranch-protection=bti -Wl,-z,force-bti
+# RUN: link_fdata --no-lbr %s %t %t.fdata
+# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions \
+# RUN: --print-split --print-only foo --print-longjmp 2>&1 | FileCheck %s
+
+#CHECK: BOLT-INFO: Starting stub-insertion pass
+#CHECK: Binary Function "foo" after long-jmp
+
+#CHECK:  cmp x0, #0x0
+#CHECK-NEXT: Successors: .LStub0
+
+#CHECK:  adrpx16, .Ltmp0
+#CHECK-NEXT: add x16, x16, :lo12:.Ltmp0
+#CHECK-NEXT: br  x16 # UNKNOWN CONTROL FLOW
+
+#CHECK: ---   HOT-COLD SPLIT POINT   ---
+
+#CHECK:  bti c
+#CHECK-NEXT: mov x0, #0x2
+#CHECK-NEXT: ret

bgergely0 wrote:

this is about the space between # and CHECK, right?

https://github.com/llvm/llvm-project/pull/171149
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [WebAssembly] Remove FAKE_USEs before ExplicitLocals (#160768) (PR #171184)

2025-12-08 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-webassembly

Author: None (llvmbot)


Changes

Backport e5b2a06546eb20662156b8a59b77aca086301486

Requested by: @dschuff

---
Full diff: https://github.com/llvm/llvm-project/pull/171184.diff


3 Files Affected:

- (modified) llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp (+14) 
- (modified) llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp (+4) 
- (added) llvm/test/CodeGen/WebAssembly/fake-use.ll (+25) 


``diff
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index e6486e247209b..5c3127e2d3dc6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -216,6 +216,18 @@ static MachineInstr *findStartOfTree(MachineOperand &MO,
   return Def;
 }
 
+// FAKE_USEs are no-ops, so remove them here so that the values used by them
+// will be correctly dropped later.
+static void removeFakeUses(MachineFunction &MF) {
+  SmallVector ToDelete;
+  for (auto &MBB : MF)
+for (auto &MI : MBB)
+  if (MI.isFakeUse())
+ToDelete.push_back(&MI);
+  for (auto *MI : ToDelete)
+MI->eraseFromParent();
+}
+
 bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
   LLVM_DEBUG(dbgs() << "** Make Locals Explicit **\n"
"** Function: "
@@ -226,6 +238,8 @@ bool 
WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
   WebAssemblyFunctionInfo &MFI = *MF.getInfo();
   const auto *TII = MF.getSubtarget().getInstrInfo();
 
+  removeFakeUses(MF);
+
   // Map non-stackified virtual registers to their local ids.
   DenseMap Reg2Local;
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp 
b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index bc91c6424b63e..fd13ef9a1921d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -866,6 +866,10 @@ bool 
WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
   if (Insert->isDebugValue())
 continue;
 
+  // Ignore FAKE_USEs, which are no-ops and will be deleted later.
+  if (Insert->isFakeUse())
+continue;
+
   // Iterate through the inputs in reverse order, since we'll be pulling
   // operands off the stack in LIFO order.
   CommutingState Commuting;
diff --git a/llvm/test/CodeGen/WebAssembly/fake-use.ll 
b/llvm/test/CodeGen/WebAssembly/fake-use.ll
new file mode 100644
index 0..a18ce33566df0
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/fake-use.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s | llvm-mc -triple=wasm32-unknown-unknown
+
+target triple = "wasm32-unknown-unknown"
+
+define void @fake_use() {
+  %t = call i32 @foo()
+  tail call void (...) @llvm.fake.use(i32 %t)
+  ret void
+}
+
+; %t shouldn't be converted to TEE in RegStackify, because the FAKE_USE will be
+; deleted in the beginning of ExplicitLocals.
+define void @fake_use_no_tee() {
+  %t = call i32 @foo()
+  tail call void (...) @llvm.fake.use(i32 %t)
+  call void @use(i32 %t)
+  ret void
+}
+
+declare i32 @foo()
+declare void @use(i32 %t)
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn 
memory(inaccessiblemem: readwrite)
+declare void @llvm.fake.use(...) #0
+
+attributes #0 = { mustprogress nocallback nofree nosync nounwind willreturn 
memory(inaccessiblemem: readwrite) }

``




https://github.com/llvm/llvm-project/pull/171184
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Use different name scope for MIMGEncoding and MIMGDim (PR #171166)

2025-12-08 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/171166
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFCI][AArch64] Replace R_AARCH64_AUTH_ABS64 addend hack (PR #171192)

2025-12-08 Thread Jessica Clarke via llvm-branch-commits


@@ -585,7 +585,7 @@ struct RelativeReloc {
 return inputSec->getVA(inputSec->relocs()[relocIdx].offset);
   }
 
-  const InputSectionBase *inputSec;
+  InputSectionBase *inputSec;

jrtc27 wrote:

Could const_cast instead in finalizeAddressDependentContent, whichever's deemed 
better for this case (note that *inputSec is const in DynamicReloc, so removing 
it here creates disparity)

https://github.com/llvm/llvm-project/pull/171192
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFCI][AArch64] Replace R_AARCH64_AUTH_ABS64 addend hack (PR #171192)

2025-12-08 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-lld

Author: Jessica Clarke (jrtc27)


Changes

Rather than trying to infer deep down in AArch64::relocate whether we
need to actually write anything or not, we should instead mark the
relocations that we no longer want so we don't actually apply them. This
is similar to how X86_64::deleteFallThruJmpInsn works, although given
the target is still valid we don't need to mess with the offset, just
the expr.


---
Full diff: https://github.com/llvm/llvm-project/pull/171192.diff


3 Files Affected:

- (modified) lld/ELF/Arch/AArch64.cpp (+7-11) 
- (modified) lld/ELF/SyntheticSections.h (+1-1) 
- (modified) lld/ELF/Writer.cpp (+2-1) 


``diff
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index f68403b69419f..34cca88ae63b0 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -533,17 +533,11 @@ void AArch64::relocate(uint8_t *loc, const Relocation 
&rel,
 write64(ctx, loc, val);
 break;
   case R_AARCH64_AUTH_ABS64:
-// If val is wider than 32 bits, the relocation must have been moved from
-// .relr.auth.dyn to .rela.dyn, and the addend write is not needed.
-//
-// If val fits in 32 bits, we have two potential scenarios:
-// * True RELR: Write the 32-bit `val`.
-// * RELA: Even if the value now fits in 32 bits, it might have been
-//   converted from RELR during an iteration in
-//   finalizeAddressDependentContent(). Writing the value is harmless
-//   because dynamic linking ignores it.
-if (isInt<32>(val))
-  write32(ctx, loc, val);
+// This is used for the addend of a .relr.auth.dyn entry,
+// which is a 32-bit value; the upper 32 bits are used to
+// encode the schema.
+checkInt(ctx, loc, val, 32, rel);
+write32(ctx, loc, val);
 break;
   case R_AARCH64_ADD_ABS_LO12_NC:
   case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
@@ -935,6 +929,8 @@ void AArch64::relocateAlloc(InputSection &sec, uint8_t 
*buf) const {
   AArch64Relaxer relaxer(ctx, sec.relocs());
   for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) {
 const Relocation &rel = sec.relocs()[i];
+if (rel.expr == R_NONE) // See finalizeAddressDependentContent()
+  continue;
 uint8_t *loc = buf + rel.offset;
 const uint64_t val = sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset);
 
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 72711aa75aec9..2b5897c9a40b0 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -585,7 +585,7 @@ struct RelativeReloc {
 return inputSec->getVA(inputSec->relocs()[relocIdx].offset);
   }
 
-  const InputSectionBase *inputSec;
+  InputSectionBase *inputSec;
   size_t relocIdx;
 };
 
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 083b4fb1dbd22..db5626e701ad6 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1583,9 +1583,10 @@ template  void 
Writer::finalizeAddressDependentContent() {
   if (part.relrAuthDyn) {
 auto it = llvm::remove_if(
 part.relrAuthDyn->relocs, [this, &part](const RelativeReloc &elem) 
{
-  const Relocation &reloc = elem.inputSec->relocs()[elem.relocIdx];
+  Relocation &reloc = elem.inputSec->relocs()[elem.relocIdx];
   if (isInt<32>(reloc.sym->getVA(ctx, reloc.addend)))
 return false;
+  reloc.expr = R_NONE;
   part.relaDyn->addReloc({R_AARCH64_AUTH_RELATIVE, elem.inputSec,
   reloc.offset, false, *reloc.sym,
   reloc.addend, R_ABS});

``




https://github.com/llvm/llvm-project/pull/171192
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFCI][AArch64] Replace R_AARCH64_AUTH_ABS64 addend hack (PR #171192)

2025-12-08 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-lld-elf

Author: Jessica Clarke (jrtc27)


Changes

Rather than trying to infer deep down in AArch64::relocate whether we
need to actually write anything or not, we should instead mark the
relocations that we no longer want so we don't actually apply them. This
is similar to how X86_64::deleteFallThruJmpInsn works, although given
the target is still valid we don't need to mess with the offset, just
the expr.


---
Full diff: https://github.com/llvm/llvm-project/pull/171192.diff


3 Files Affected:

- (modified) lld/ELF/Arch/AArch64.cpp (+7-11) 
- (modified) lld/ELF/SyntheticSections.h (+1-1) 
- (modified) lld/ELF/Writer.cpp (+2-1) 


``diff
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index f68403b69419f..34cca88ae63b0 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -533,17 +533,11 @@ void AArch64::relocate(uint8_t *loc, const Relocation 
&rel,
 write64(ctx, loc, val);
 break;
   case R_AARCH64_AUTH_ABS64:
-// If val is wider than 32 bits, the relocation must have been moved from
-// .relr.auth.dyn to .rela.dyn, and the addend write is not needed.
-//
-// If val fits in 32 bits, we have two potential scenarios:
-// * True RELR: Write the 32-bit `val`.
-// * RELA: Even if the value now fits in 32 bits, it might have been
-//   converted from RELR during an iteration in
-//   finalizeAddressDependentContent(). Writing the value is harmless
-//   because dynamic linking ignores it.
-if (isInt<32>(val))
-  write32(ctx, loc, val);
+// This is used for the addend of a .relr.auth.dyn entry,
+// which is a 32-bit value; the upper 32 bits are used to
+// encode the schema.
+checkInt(ctx, loc, val, 32, rel);
+write32(ctx, loc, val);
 break;
   case R_AARCH64_ADD_ABS_LO12_NC:
   case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
@@ -935,6 +929,8 @@ void AArch64::relocateAlloc(InputSection &sec, uint8_t 
*buf) const {
   AArch64Relaxer relaxer(ctx, sec.relocs());
   for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) {
 const Relocation &rel = sec.relocs()[i];
+if (rel.expr == R_NONE) // See finalizeAddressDependentContent()
+  continue;
 uint8_t *loc = buf + rel.offset;
 const uint64_t val = sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset);
 
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 72711aa75aec9..2b5897c9a40b0 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -585,7 +585,7 @@ struct RelativeReloc {
 return inputSec->getVA(inputSec->relocs()[relocIdx].offset);
   }
 
-  const InputSectionBase *inputSec;
+  InputSectionBase *inputSec;
   size_t relocIdx;
 };
 
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 083b4fb1dbd22..db5626e701ad6 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1583,9 +1583,10 @@ template  void 
Writer::finalizeAddressDependentContent() {
   if (part.relrAuthDyn) {
 auto it = llvm::remove_if(
 part.relrAuthDyn->relocs, [this, &part](const RelativeReloc &elem) 
{
-  const Relocation &reloc = elem.inputSec->relocs()[elem.relocIdx];
+  Relocation &reloc = elem.inputSec->relocs()[elem.relocIdx];
   if (isInt<32>(reloc.sym->getVA(ctx, reloc.addend)))
 return false;
+  reloc.expr = R_NONE;
   part.relaDyn->addReloc({R_AARCH64_AUTH_RELATIVE, elem.inputSec,
   reloc.offset, false, *reloc.sym,
   reloc.addend, R_ABS});

``




https://github.com/llvm/llvm-project/pull/171192
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)

2025-12-08 Thread Gang Chen via llvm-branch-commits

https://github.com/cmc-rep edited 
https://github.com/llvm/llvm-project/pull/169476
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFCI][AArch64] Replace R_AARCH64_AUTH_ABS64 addend hack (PR #171192)

2025-12-08 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 created 
https://github.com/llvm/llvm-project/pull/171192

Rather than trying to infer deep down in AArch64::relocate whether we
need to actually write anything or not, we should instead mark the
relocations that we no longer want so we don't actually apply them. This
is similar to how X86_64::deleteFallThruJmpInsn works, although given
the target is still valid we don't need to mess with the offset, just
the expr.



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC] [FlowSensitive] Add mock unique_ptr header (PR #170942)

2025-12-08 Thread Jan Voung via llvm-branch-commits

https://github.com/jvoung approved this pull request.


https://github.com/llvm/llvm-project/pull/170942
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] Prepare libcxx and libcxxabi for pointer field protection. (PR #151651)

2025-12-08 Thread Oliver Hunt via llvm-branch-commits

ojhunt wrote:

> `std::trivially_relocate` and friends have been removed from C++26. and the 
> new equivalent of `std::trivially_relocate` in C++29 is very likely to be 
> based on `memcpy` or `memmov` . I am not sure how PFP can work with the 
> future `std::trivially_relocate` at all to be honest

I'm unsure why that is being assumed here - the assertion that "trivial" means 
memcpy is not supported by the existing usage of trivial in the specification. 
It would in fact mean that the meaning of "trivial" in "trivially_relocate" is 
different from every other usage in the standard.

https://github.com/llvm/llvm-project/pull/151651
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [CIR] Implement function personality attribute and its lowering (PR #171001)

2025-12-08 Thread Andy Kaylor via llvm-branch-commits

andykaylor wrote:

> What do you mean by CIRGenModule changes? I just added missing feature test, 
> as `setWindowsItaniumDLLImport` is unconditionally invoked on the path for 
> personality function generation, but it is still not supported.

Yeah, sorry for the confusion. I didn't notice that was the only change in 
CIRGenModule.

> I believe CIRGenException changes were already tested, I just moved 
> personality funciton generation from LowerToLLVM.

The `clang/test/CIR/Lowering/eh-inflight.cir` test only covers the lowering 
from CIR case. There is no test that handles the generation of the personality 
attribute from C++ source.

https://github.com/llvm/llvm-project/pull/171001
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] clang/HIP: Avoid using ocml logb (PR #171186)

2025-12-08 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: Matt Arsenault (arsenm)


Changes

We have special case handling for the logb builtins, so use them.

---
Full diff: https://github.com/llvm/llvm-project/pull/171186.diff


2 Files Affected:

- (modified) clang/lib/Headers/__clang_hip_math.h (+2-2) 
- (modified) clang/test/Headers/__clang_hip_math.hip (+116-46) 


``diff
diff --git a/clang/lib/Headers/__clang_hip_math.h 
b/clang/lib/Headers/__clang_hip_math.h
index 759e742c9d012..03c2721b4ad3c 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -498,7 +498,7 @@ __DEVICE__
 float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __builtin_log2f)(__x); 
}
 
 __DEVICE__
-float logbf(float __x) { return __ocml_logb_f32(__x); }
+float logbf(float __x) { return __builtin_logbf(__x); }
 
 __DEVICE__
 float logf(float __x) { return __FAST_OR_SLOW(__logf, __builtin_logf)(__x); }
@@ -901,7 +901,7 @@ __DEVICE__
 double log2(double __x) { return __ocml_log2_f64(__x); }
 
 __DEVICE__
-double logb(double __x) { return __ocml_logb_f64(__x); }
+double logb(double __x) { return __builtin_logb(__x); }
 
 __DEVICE__
 long int lrint(double __x) { return __builtin_rint(__x); }
diff --git a/clang/test/Headers/__clang_hip_math.hip 
b/clang/test/Headers/__clang_hip_math.hip
index 4163666811c91..426e5af319cbf 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -3871,69 +3871,139 @@ extern "C" __device__ double test_log2(double x) {
   return log2(x);
 }
 
-// DEFAULT-LABEL: define dso_local noundef float @test_logbf(
-// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-LABEL: define dso_local float @test_logbf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
-// DEFAULT-NEXT:[[CALL_I:%.*]] = tail call contract noundef float 
@__ocml_logb_f32(float noundef [[X]]) #[[ATTR13]]
-// DEFAULT-NEXT:ret float [[CALL_I]]
-//
-// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float 
@test_logbf(
-// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) 
local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:[[TMP0:%.*]] = tail call { float, i32 } 
@llvm.frexp.f32.i32(float [[X]])
+// DEFAULT-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// DEFAULT-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// DEFAULT-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// DEFAULT-NEXT:[[TMP4:%.*]] = tail call contract float 
@llvm.fabs.f32(float [[X]])
+// DEFAULT-NEXT:[[TMP5:%.*]] = fcmp contract one float [[TMP4]], 
0x7FF0
+// DEFAULT-NEXT:[[TMP6:%.*]] = select contract i1 [[TMP5]], float 
[[TMP3]], float [[TMP4]]
+// DEFAULT-NEXT:[[TMP7:%.*]] = fcmp contract oeq float [[X]], 0.00e+00
+// DEFAULT-NEXT:[[TMP8:%.*]] = select contract i1 [[TMP7]], float 
0xFFF0, float [[TMP6]]
+// DEFAULT-NEXT:ret float [[TMP8]]
+//
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_logbf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) 
local_unnamed_addr #[[ATTR3]] {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
-// FINITEONLY-NEXT:[[CALL_I:%.*]] = tail call nnan ninf contract noundef 
nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) 
[[X]]) #[[ATTR13]]
-// FINITEONLY-NEXT:ret float [[CALL_I]]
+// FINITEONLY-NEXT:[[TMP0:%.*]] = tail call { float, i32 } 
@llvm.frexp.f32.i32(float nofpclass(nan inf) [[X]])
+// FINITEONLY-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// FINITEONLY-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// FINITEONLY-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// FINITEONLY-NEXT:ret float [[TMP3]]
 //
-// APPROX-LABEL: define dso_local noundef float @test_logbf(
-// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-LABEL: define dso_local float @test_logbf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
-// APPROX-NEXT:[[CALL_I:%.*]] = tail call contract noundef float 
@__ocml_logb_f32(float noundef [[X]]) #[[ATTR13]]
-// APPROX-NEXT:ret float [[CALL_I]]
-//
-// NCRDIV-LABEL: define dso_local noundef float @test_logbf(
-// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:[[TMP0:%.*]] = tail call { float, i32 } 
@llvm.frexp.f32.i32(float [[X]])
+// APPROX-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// APPROX-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// APPROX-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// APPROX-NEXT:[[TMP4:%.*]] = tail call contract float 
@llvm.fabs.f32(float [[X]])
+// APPROX-NEXT:[[TMP5:%.*]] = fcmp contract one float [[TMP4]], 
0x7FF0
+// APPROX-NEXT:[[TMP6:%.*]] = select contract i1 [[TMP5]], float [[TMP3]], 
float [[TMP4]]
+//

[llvm-branch-commits] [clang] clang/HIP: Avoid using ocml logb (PR #171186)

2025-12-08 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Matt Arsenault (arsenm)


Changes

We have special case handling for the logb builtins, so use them.

---
Full diff: https://github.com/llvm/llvm-project/pull/171186.diff


2 Files Affected:

- (modified) clang/lib/Headers/__clang_hip_math.h (+2-2) 
- (modified) clang/test/Headers/__clang_hip_math.hip (+116-46) 


``diff
diff --git a/clang/lib/Headers/__clang_hip_math.h 
b/clang/lib/Headers/__clang_hip_math.h
index 759e742c9d012..03c2721b4ad3c 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -498,7 +498,7 @@ __DEVICE__
 float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __builtin_log2f)(__x); 
}
 
 __DEVICE__
-float logbf(float __x) { return __ocml_logb_f32(__x); }
+float logbf(float __x) { return __builtin_logbf(__x); }
 
 __DEVICE__
 float logf(float __x) { return __FAST_OR_SLOW(__logf, __builtin_logf)(__x); }
@@ -901,7 +901,7 @@ __DEVICE__
 double log2(double __x) { return __ocml_log2_f64(__x); }
 
 __DEVICE__
-double logb(double __x) { return __ocml_logb_f64(__x); }
+double logb(double __x) { return __builtin_logb(__x); }
 
 __DEVICE__
 long int lrint(double __x) { return __builtin_rint(__x); }
diff --git a/clang/test/Headers/__clang_hip_math.hip 
b/clang/test/Headers/__clang_hip_math.hip
index 4163666811c91..426e5af319cbf 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -3871,69 +3871,139 @@ extern "C" __device__ double test_log2(double x) {
   return log2(x);
 }
 
-// DEFAULT-LABEL: define dso_local noundef float @test_logbf(
-// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-LABEL: define dso_local float @test_logbf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // DEFAULT-NEXT:  [[ENTRY:.*:]]
-// DEFAULT-NEXT:[[CALL_I:%.*]] = tail call contract noundef float 
@__ocml_logb_f32(float noundef [[X]]) #[[ATTR13]]
-// DEFAULT-NEXT:ret float [[CALL_I]]
-//
-// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float 
@test_logbf(
-// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) 
local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:[[TMP0:%.*]] = tail call { float, i32 } 
@llvm.frexp.f32.i32(float [[X]])
+// DEFAULT-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// DEFAULT-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// DEFAULT-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// DEFAULT-NEXT:[[TMP4:%.*]] = tail call contract float 
@llvm.fabs.f32(float [[X]])
+// DEFAULT-NEXT:[[TMP5:%.*]] = fcmp contract one float [[TMP4]], 
0x7FF0
+// DEFAULT-NEXT:[[TMP6:%.*]] = select contract i1 [[TMP5]], float 
[[TMP3]], float [[TMP4]]
+// DEFAULT-NEXT:[[TMP7:%.*]] = fcmp contract oeq float [[X]], 0.00e+00
+// DEFAULT-NEXT:[[TMP8:%.*]] = select contract i1 [[TMP7]], float 
0xFFF0, float [[TMP6]]
+// DEFAULT-NEXT:ret float [[TMP8]]
+//
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_logbf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) 
local_unnamed_addr #[[ATTR3]] {
 // FINITEONLY-NEXT:  [[ENTRY:.*:]]
-// FINITEONLY-NEXT:[[CALL_I:%.*]] = tail call nnan ninf contract noundef 
nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) 
[[X]]) #[[ATTR13]]
-// FINITEONLY-NEXT:ret float [[CALL_I]]
+// FINITEONLY-NEXT:[[TMP0:%.*]] = tail call { float, i32 } 
@llvm.frexp.f32.i32(float nofpclass(nan inf) [[X]])
+// FINITEONLY-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// FINITEONLY-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// FINITEONLY-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// FINITEONLY-NEXT:ret float [[TMP3]]
 //
-// APPROX-LABEL: define dso_local noundef float @test_logbf(
-// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-LABEL: define dso_local float @test_logbf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
 // APPROX-NEXT:  [[ENTRY:.*:]]
-// APPROX-NEXT:[[CALL_I:%.*]] = tail call contract noundef float 
@__ocml_logb_f32(float noundef [[X]]) #[[ATTR13]]
-// APPROX-NEXT:ret float [[CALL_I]]
-//
-// NCRDIV-LABEL: define dso_local noundef float @test_logbf(
-// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:[[TMP0:%.*]] = tail call { float, i32 } 
@llvm.frexp.f32.i32(float [[X]])
+// APPROX-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// APPROX-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// APPROX-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// APPROX-NEXT:[[TMP4:%.*]] = tail call contract float 
@llvm.fabs.f32(float [[X]])
+// APPROX-NEXT:[[TMP5:%.*]] = fcmp contract one float [[TMP4]], 
0x7FF0
+// APPROX-NEXT:[[TMP6:%.*]] = select contract i1 [[TMP5]], float [[TMP3]], 
float [[TMP4]]
+// APPRO

[llvm-branch-commits] [clang] clang/HIP: Avoid using ocml logb (PR #171186)

2025-12-08 Thread Joseph Huber via llvm-branch-commits

https://github.com/jhuber6 approved this pull request.


https://github.com/llvm/llvm-project/pull/171186
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LTT] Add `unknown` branch weights when lowering type tests with conditional (PR #170752)

2025-12-08 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/170752

>From ea48cf14b27d5ec6d0cbe02aaa6f10f5fa9299d0 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Thu, 4 Dec 2025 13:48:43 -0800
Subject: [PATCH] [LTT] Add `unknown` branch weights when lowering type tests
 with conditional

---
 llvm/lib/Transforms/IPO/LowerTypeTests.cpp|  6 -
 llvm/test/Transforms/LowerTypeTests/import.ll | 23 +++
 llvm/utils/profcheck-xfail.txt|  2 --
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp 
b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index f7aeda95e41b3..06deea8ba5848 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -48,12 +48,14 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/ModuleSummaryIndexYAML.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/IR/ReplaceConstant.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
@@ -802,7 +804,9 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata 
*TypeId, CallInst *CI,
 return createBitSetTest(ThenB, TIL, BitOffset);
   }
 
-  IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false));
+  MDBuilder MDB(M.getContext());
+  IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false,
+  
MDB.createLikelyBranchWeights()));
 
   // Now that we know that the offset is in range and aligned, load the
   // appropriate bit from the bitset.
diff --git a/llvm/test/Transforms/LowerTypeTests/import.ll 
b/llvm/test/Transforms/LowerTypeTests/import.ll
index e3c2d8a3d3e8c..1583dda58cddc 100644
--- a/llvm/test/Transforms/LowerTypeTests/import.ll
+++ b/llvm/test/Transforms/LowerTypeTests/import.ll
@@ -92,7 +92,7 @@ define i1 @bytearray7(ptr %p) {
 ; X86-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr 
@__typeid_bytearray7_global_addr to i64), [[TMP1]]
 ; X86-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64 
[[TMP2]], i64 ptrtoint (ptr @__typeid_bytearray7_align to i64))
 ; X86-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], ptrtoint (ptr 
@__typeid_bytearray7_size_m1 to i64)
-; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof 
[[PROF6:![0-9]+]]
 ; X86:   5:
 ; X86-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr 
@__typeid_bytearray7_byte_array, i64 [[TMP3]]
 ; X86-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -109,7 +109,7 @@ define i1 @bytearray7(ptr %p) {
 ; ARM-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr 
@__typeid_bytearray7_global_addr to i64), [[TMP1]]
 ; ARM-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64 
[[TMP2]], i64 3)
 ; ARM-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], 43
-; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof 
[[PROF0:![0-9]+]]
 ; ARM:   5:
 ; ARM-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr 
@__typeid_bytearray7_byte_array, i64 [[TMP3]]
 ; ARM-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -131,7 +131,7 @@ define i1 @bytearray32(ptr %p) {
 ; X86-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr 
@__typeid_bytearray32_global_addr to i64), [[TMP1]]
 ; X86-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64 
[[TMP2]], i64 ptrtoint (ptr @__typeid_bytearray32_align to i64))
 ; X86-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], ptrtoint (ptr 
@__typeid_bytearray32_size_m1 to i64)
-; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof 
[[PROF6]]
 ; X86:   5:
 ; X86-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr 
@__typeid_bytearray32_byte_array, i64 [[TMP3]]
 ; X86-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -148,7 +148,7 @@ define i1 @bytearray32(ptr %p) {
 ; ARM-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr 
@__typeid_bytearray32_global_addr to i64), [[TMP1]]
 ; ARM-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64 
[[TMP2]], i64 4)
 ; ARM-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], 12346
-; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof 
[[PROF0]]
 ; ARM:   5:
 ; ARM-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr 
@__typeid_bytearray32_byte_array, i64 [[TMP3]]
 ; ARM-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -170,7 +170,7 @@ define i1 @inline5(ptr %p) {
 ; X86-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr 
@__t

[llvm-branch-commits] [llvm] [LTT] Add `unknown` branch weights when lowering type tests with conditional (PR #170752)

2025-12-08 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/170752

>From ea48cf14b27d5ec6d0cbe02aaa6f10f5fa9299d0 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Thu, 4 Dec 2025 13:48:43 -0800
Subject: [PATCH] [LTT] Add `unknown` branch weights when lowering type tests
 with conditional

---
 llvm/lib/Transforms/IPO/LowerTypeTests.cpp|  6 -
 llvm/test/Transforms/LowerTypeTests/import.ll | 23 +++
 llvm/utils/profcheck-xfail.txt|  2 --
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp 
b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index f7aeda95e41b3..06deea8ba5848 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -48,12 +48,14 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/ModuleSummaryIndexYAML.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/IR/ReplaceConstant.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
@@ -802,7 +804,9 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata 
*TypeId, CallInst *CI,
 return createBitSetTest(ThenB, TIL, BitOffset);
   }
 
-  IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false));
+  MDBuilder MDB(M.getContext());
+  IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false,
+  
MDB.createLikelyBranchWeights()));
 
   // Now that we know that the offset is in range and aligned, load the
   // appropriate bit from the bitset.
diff --git a/llvm/test/Transforms/LowerTypeTests/import.ll 
b/llvm/test/Transforms/LowerTypeTests/import.ll
index e3c2d8a3d3e8c..1583dda58cddc 100644
--- a/llvm/test/Transforms/LowerTypeTests/import.ll
+++ b/llvm/test/Transforms/LowerTypeTests/import.ll
@@ -92,7 +92,7 @@ define i1 @bytearray7(ptr %p) {
 ; X86-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr 
@__typeid_bytearray7_global_addr to i64), [[TMP1]]
 ; X86-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64 
[[TMP2]], i64 ptrtoint (ptr @__typeid_bytearray7_align to i64))
 ; X86-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], ptrtoint (ptr 
@__typeid_bytearray7_size_m1 to i64)
-; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof 
[[PROF6:![0-9]+]]
 ; X86:   5:
 ; X86-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr 
@__typeid_bytearray7_byte_array, i64 [[TMP3]]
 ; X86-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -109,7 +109,7 @@ define i1 @bytearray7(ptr %p) {
 ; ARM-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr 
@__typeid_bytearray7_global_addr to i64), [[TMP1]]
 ; ARM-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64 
[[TMP2]], i64 3)
 ; ARM-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], 43
-; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof 
[[PROF0:![0-9]+]]
 ; ARM:   5:
 ; ARM-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr 
@__typeid_bytearray7_byte_array, i64 [[TMP3]]
 ; ARM-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -131,7 +131,7 @@ define i1 @bytearray32(ptr %p) {
 ; X86-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr 
@__typeid_bytearray32_global_addr to i64), [[TMP1]]
 ; X86-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64 
[[TMP2]], i64 ptrtoint (ptr @__typeid_bytearray32_align to i64))
 ; X86-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], ptrtoint (ptr 
@__typeid_bytearray32_size_m1 to i64)
-; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof 
[[PROF6]]
 ; X86:   5:
 ; X86-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr 
@__typeid_bytearray32_byte_array, i64 [[TMP3]]
 ; X86-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -148,7 +148,7 @@ define i1 @bytearray32(ptr %p) {
 ; ARM-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr 
@__typeid_bytearray32_global_addr to i64), [[TMP1]]
 ; ARM-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64 
[[TMP2]], i64 4)
 ; ARM-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], 12346
-; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof 
[[PROF0]]
 ; ARM:   5:
 ; ARM-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr 
@__typeid_bytearray32_byte_array, i64 [[TMP3]]
 ; ARM-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -170,7 +170,7 @@ define i1 @inline5(ptr %p) {
 ; X86-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr 
@__t

[llvm-branch-commits] [llvm] release/21.x: [WebAssembly] Remove FAKE_USEs before ExplicitLocals (#160768) (PR #171184)

2025-12-08 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/171184
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)

2025-12-08 Thread Shilei Tian via llvm-branch-commits


@@ -0,0 +1,425 @@
+//===-- AMDGPUMachineLevelInliner.cpp - AMDGPU Machine Level Inliner ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "AMDGPUMachineLevelInliner.h"
+#include "AMDGPU.h"
+#include "AMDGPUMachineModuleInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManagers.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-machine-level-inliner"
+
+namespace {
+class AMDGPUInliningPassManager : public FPPassManager {
+public:
+  static char ID;
+
+  explicit AMDGPUInliningPassManager() : FPPassManager(ID) {}
+
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool doFinalization(Module &M) override;
+
+  StringRef getPassName() const override {
+return "AMDGPU Inlining Pass Manager";
+  }
+};
+
+/// AMDGPUInliningAnchor - A machine function pass that serves as an anchor for
+/// setting up the AMDGPU inlining pass manager infrastructure. It makes sure
+/// the inliner is run via an AMDGPUInliningPassManager. It can be run well in
+/// advance of the inliner as long as there are only FunctionPasses in between.
+class AMDGPUInliningAnchor : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification
+
+  AMDGPUInliningAnchor() : MachineFunctionPass(ID) {}
+
+  // We don't really need to process any functions here.
+  bool runOnMachineFunction(MachineFunction &MF) override { return false; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  StringRef getPassName() const override;
+
+  /// Prepare the pass manager stack for the inliner. This will push an
+  /// `AMDGPUInliningPassManager` onto the stack.
+  void preparePassManager(PMStack &Stack) override;
+};
+
+} // end anonymous namespace.
+
+// Pass identification
+char AMDGPUMachineLevelInliner::ID = 0;
+char AMDGPUInliningPassManager::ID = 0;
+char AMDGPUInliningAnchor::ID = 0;
+
+char &llvm::AMDGPUMachineLevelInlinerID = AMDGPUMachineLevelInliner::ID;
+char &llvm::AMDGPUInliningAnchorID = AMDGPUInliningAnchor::ID;
+
+INITIALIZE_PASS_BEGIN(AMDGPUMachineLevelInliner, DEBUG_TYPE,
+  "AMDGPU Machine Level Inliner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUInliningAnchor)
+INITIALIZE_PASS_END(AMDGPUMachineLevelInliner, DEBUG_TYPE,
+"AMDGPU Machine Level Inliner", false, false)
+
+INITIALIZE_PASS_BEGIN(AMDGPUInliningAnchor, "amdgpu-inlining-anchor",
+  "AMDGPU Inlining Anchor", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_END(AMDGPUInliningAnchor, "amdgpu-inlining-anchor",
+"AMDGPU Inlining Anchor", false, true)
+
+AMDGPUMachineLevelInliner::AMDGPUMachineLevelInliner()
+: MachineFunctionPass(ID) {
+  initializeAMDGPUMachineLevelInlinerPass(*PassRegistry::getPassRegistry());
+}
+
+void AMDGPUMachineLevelInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired();
+  AU.addRequired();
+  AU.addPreserved();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool AMDGPUMachineLevelInliner::runOnMachineFunction(MachineFunction &MF) {
+  MachineModuleInfo &MMI = 
getAnalysis().getMMI();
+
+  Function &F = MF.getFunction();
+  if (shouldInlineCallsTo(F)) {
+// Mark the function as machine-inlined in AMDGPUMachineModuleInfo. This
+// tells the inlining pass manager to stop processing it.
+auto &AMMMI = MMI.getObjFileInfo();
+AMMMI.addMachineInlinedFunction(F);
+
+return false;
+  }
+
+  bool Changed = false;
+
+  // Can't inline anything if there aren't any calls.
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (!MFI.hasCalls() && !MFI.hasTailCall())
+return false;
+
+  // Collect calls to inline.
+  SmallVector CallsToInline;
+  const SIInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+
+  for (auto &MBB : MF) {
+for (auto &MI : MBB) {
+  if (!MI.isCall())
+continue;
+
+  const MachineOperand *CalleeOp =
+  TII->getNamedOperand(MI,

[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)

2025-12-08 Thread Shilei Tian via llvm-branch-commits


@@ -0,0 +1,59 @@
+//===-- AMDGPUMachineLevelInliner.h - AMDGPU Machine Level Inliner -*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file defines the AMDGPUMachineLevelInliner pass, which performs
+// machine-level inlining for AMDGPU targets.

shiltian wrote:

Is this something that can be generic?

https://github.com/llvm/llvm-project/pull/169476
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)

2025-12-08 Thread Shilei Tian via llvm-branch-commits

https://github.com/shiltian edited 
https://github.com/llvm/llvm-project/pull/169476
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [FlowSensitive] [StatusOr] [12/N] Add support for smart pointers (PR #170943)

2025-12-08 Thread Jan Voung via llvm-branch-commits

https://github.com/jvoung approved this pull request.


https://github.com/llvm/llvm-project/pull/170943
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][ELF] Move mergeRels/partitionRels into finalizeContents (PR #171203)

2025-12-08 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-lld

Author: Jessica Clarke (jrtc27)


Changes

Other than the ordering requirements that remain between sections, this
abstracts the details of how these sections are implemented.

Note that isNeeded already checks relocsVec for both section types, so
finalizeSynthetic can call it before mergeRels just fine.


---
Full diff: https://github.com/llvm/llvm-project/pull/171203.diff


3 Files Affected:

- (modified) lld/ELF/SyntheticSections.cpp (+5) 
- (modified) lld/ELF/SyntheticSections.h (+4-3) 
- (modified) lld/ELF/Writer.cpp (+3-14) 


``diff
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index d6e214f9d0f48..c81f649861a73 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1707,6 +1707,9 @@ void RelocationBaseSection::partitionRels() {
 }
 
 void RelocationBaseSection::finalizeContents() {
+  mergeRels();
+  // Compute DT_RELACOUNT to be used by part.dynamic.
+  partitionRels();
   SymbolTableBaseSection *symTab = getPartition(ctx).dynSymTab.get();
 
   // When linking glibc statically, .rel{,a}.plt contains R_*_IRELATIVE
@@ -1796,6 +1799,8 @@ void RelrBaseSection::mergeRels() {
   relocsVec.clear();
 }
 
+void RelrBaseSection::finalizeContents() { mergeRels(); }
+
 template 
 AndroidPackedRelocationSection::AndroidPackedRelocationSection(
 Ctx &ctx, StringRef name, unsigned concurrency)
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 2b5897c9a40b0..8971d5f6c2349 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -530,14 +530,14 @@ class RelocationBaseSection : public SyntheticSection {
   }
   size_t getSize() const override { return relocs.size() * this->entsize; }
   size_t getRelativeRelocCount() const { return numRelativeRelocs; }
-  void mergeRels();
-  void partitionRels();
   void finalizeContents() override;
 
   int32_t dynamicTag, sizeDynamicTag;
   SmallVector relocs;
 
 protected:
+  void mergeRels();
+  void partitionRels();
   void computeRels();
   // Used when parallel relocation scanning adds relocations. The elements
   // will be moved into relocs by mergeRel().
@@ -608,14 +608,15 @@ class RelrBaseSection : public SyntheticSection {
 isec.addReloc({expr, addendRelType, offsetInSec, addend, &sym});
 addReloc({&isec, isec.relocs().size() - 1});
   }
-  void mergeRels();
   bool isNeeded() const override {
 return !relocs.empty() ||
llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); });
   }
+  void finalizeContents() override;
   SmallVector relocs;
 
 protected:
+  void mergeRels();
   SmallVector, 0> relocsVec;
 };
 
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index db5626e701ad6..57202f42cce5b 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -2111,20 +2111,9 @@ template  void 
Writer::finalizeSections() {
 // Dynamic section must be the last one in this list and dynamic
 // symbol table section (dynSymTab) must be the first one.
 for (Partition &part : ctx.partitions) {
-  if (part.relaDyn) {
-part.relaDyn->mergeRels();
-// Compute DT_RELACOUNT to be used by part.dynamic.
-part.relaDyn->partitionRels();
-finalizeSynthetic(ctx, part.relaDyn.get());
-  }
-  if (part.relrDyn) {
-part.relrDyn->mergeRels();
-finalizeSynthetic(ctx, part.relrDyn.get());
-  }
-  if (part.relrAuthDyn) {
-part.relrAuthDyn->mergeRels();
-finalizeSynthetic(ctx, part.relrAuthDyn.get());
-  }
+  finalizeSynthetic(ctx, part.relaDyn.get());
+  finalizeSynthetic(ctx, part.relrDyn.get());
+  finalizeSynthetic(ctx, part.relrAuthDyn.get());
 
   finalizeSynthetic(ctx, part.dynSymTab.get());
   finalizeSynthetic(ctx, part.gnuHashTab.get());

``




https://github.com/llvm/llvm-project/pull/171203
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][ELF] Move mergeRels/partitionRels into finalizeContents (PR #171203)

2025-12-08 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-lld-elf

Author: Jessica Clarke (jrtc27)


Changes

Other than the ordering requirements that remain between sections, this
abstracts the details of how these sections are implemented.

Note that isNeeded already checks relocsVec for both section types, so
finalizeSynthetic can call it before mergeRels just fine.


---
Full diff: https://github.com/llvm/llvm-project/pull/171203.diff


3 Files Affected:

- (modified) lld/ELF/SyntheticSections.cpp (+5) 
- (modified) lld/ELF/SyntheticSections.h (+4-3) 
- (modified) lld/ELF/Writer.cpp (+3-14) 


``diff
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index d6e214f9d0f48..c81f649861a73 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1707,6 +1707,9 @@ void RelocationBaseSection::partitionRels() {
 }
 
 void RelocationBaseSection::finalizeContents() {
+  mergeRels();
+  // Compute DT_RELACOUNT to be used by part.dynamic.
+  partitionRels();
   SymbolTableBaseSection *symTab = getPartition(ctx).dynSymTab.get();
 
   // When linking glibc statically, .rel{,a}.plt contains R_*_IRELATIVE
@@ -1796,6 +1799,8 @@ void RelrBaseSection::mergeRels() {
   relocsVec.clear();
 }
 
+void RelrBaseSection::finalizeContents() { mergeRels(); }
+
 template 
 AndroidPackedRelocationSection::AndroidPackedRelocationSection(
 Ctx &ctx, StringRef name, unsigned concurrency)
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 2b5897c9a40b0..8971d5f6c2349 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -530,14 +530,14 @@ class RelocationBaseSection : public SyntheticSection {
   }
   size_t getSize() const override { return relocs.size() * this->entsize; }
   size_t getRelativeRelocCount() const { return numRelativeRelocs; }
-  void mergeRels();
-  void partitionRels();
   void finalizeContents() override;
 
   int32_t dynamicTag, sizeDynamicTag;
   SmallVector relocs;
 
 protected:
+  void mergeRels();
+  void partitionRels();
   void computeRels();
   // Used when parallel relocation scanning adds relocations. The elements
   // will be moved into relocs by mergeRel().
@@ -608,14 +608,15 @@ class RelrBaseSection : public SyntheticSection {
 isec.addReloc({expr, addendRelType, offsetInSec, addend, &sym});
 addReloc({&isec, isec.relocs().size() - 1});
   }
-  void mergeRels();
   bool isNeeded() const override {
 return !relocs.empty() ||
llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); });
   }
+  void finalizeContents() override;
   SmallVector relocs;
 
 protected:
+  void mergeRels();
   SmallVector, 0> relocsVec;
 };
 
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index db5626e701ad6..57202f42cce5b 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -2111,20 +2111,9 @@ template  void 
Writer::finalizeSections() {
 // Dynamic section must be the last one in this list and dynamic
 // symbol table section (dynSymTab) must be the first one.
 for (Partition &part : ctx.partitions) {
-  if (part.relaDyn) {
-part.relaDyn->mergeRels();
-// Compute DT_RELACOUNT to be used by part.dynamic.
-part.relaDyn->partitionRels();
-finalizeSynthetic(ctx, part.relaDyn.get());
-  }
-  if (part.relrDyn) {
-part.relrDyn->mergeRels();
-finalizeSynthetic(ctx, part.relrDyn.get());
-  }
-  if (part.relrAuthDyn) {
-part.relrAuthDyn->mergeRels();
-finalizeSynthetic(ctx, part.relrAuthDyn.get());
-  }
+  finalizeSynthetic(ctx, part.relaDyn.get());
+  finalizeSynthetic(ctx, part.relrDyn.get());
+  finalizeSynthetic(ctx, part.relrAuthDyn.get());
 
   finalizeSynthetic(ctx, part.dynSymTab.get());
   finalizeSynthetic(ctx, part.gnuHashTab.get());

``




https://github.com/llvm/llvm-project/pull/171203
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][ELF] Move mergeRels/partitionRels into finalizeContents (PR #171203)

2025-12-08 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 created 
https://github.com/llvm/llvm-project/pull/171203

Other than the ordering requirements that remain between sections, this
abstracts the details of how these sections are implemented.

Note that isNeeded already checks relocsVec for both section types, so
finalizeSynthetic can call it before mergeRels just fine.



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFCI][ELF][AArch64][PAC] Teach addRelativeReloc to emit R_AARCH64_AUTH_RELATIVE (PR #171180)

2025-12-08 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 edited 
https://github.com/llvm/llvm-project/pull/171180
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][ELF][AArch64][PAC] Use existing addSymbolReloc for R_AARCH64_AUTH_ABS64 (PR #171179)

2025-12-08 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 edited 
https://github.com/llvm/llvm-project/pull/171179
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [ELF][AArch64][PAC] Replace R_AARCH64_AUTH_ABS64 addend hack (PR #171192)

2025-12-08 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 edited 
https://github.com/llvm/llvm-project/pull/171192
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][ELF][AArch64][MTE] Replace addend hack with less-confusing code (PR #171182)

2025-12-08 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 edited 
https://github.com/llvm/llvm-project/pull/171182
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][ELF][AArch64][MTE] Don't duplicate addRelativeReloc call for MTE globals (PR #171181)

2025-12-08 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 edited 
https://github.com/llvm/llvm-project/pull/171181
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] VectorCombine: Improve the insert/extract fold in the narrowing case (PR #168820)

2025-12-08 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle updated 
https://github.com/llvm/llvm-project/pull/168820

From 2ee921f4d07bd5b457d5e79a848ab6183c3a8c52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= 
Date: Wed, 19 Nov 2025 18:00:32 -0800
Subject: [PATCH] VectorCombine: Improve the insert/extract fold in the
 narrowing case

Keeping the extracted element in a natural position in the narrowed
vector has two beneficial effects:

1. It makes the narrowing shuffles cheaper (at least on AMDGPU), which
   allows the insert/extract fold to trigger.
2. It makes the narrowing shuffles in a chain of extract/insert
   compatible, which allows foldLengthChangingShuffles to successfully
   recognize a chain that can be folded.

There are minor X86 test changes that look reasonable to me. The IR
change for AVX2 in 
llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
doesn't change the assembly generated by `llc -mtriple=x86_64-- -mattr=AVX2`
at all.

commit-id:c151bb04
---
 .../Transforms/Vectorize/VectorCombine.cpp| 22 +--
 .../VectorCombine/AMDGPU/extract-insert-i8.ll | 18 ++-
 .../X86/extract-insert-poison.ll  | 12 ++
 .../VectorCombine/X86/extract-insert.ll   |  8 +++
 .../Transforms/VectorCombine/X86/pr126085.ll  |  4 ++--
 5 files changed, 22 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b83597fec021a..4b081205eba10 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -4558,22 +4558,15 @@ bool 
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   SmallVector Mask(NumDstElts, PoisonMaskElem);
 
   bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
-  bool IsExtIdxInBounds = ExtIdx < NumDstElts;
   bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec);
   if (NeedDstSrcSwap) {
 SK = TargetTransformInfo::SK_PermuteSingleSrc;
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
-  Mask[InsIdx] = 0;
-else
-  Mask[InsIdx] = ExtIdx;
+Mask[InsIdx] = ExtIdx % NumDstElts;
 std::swap(DstVec, SrcVec);
   } else {
 SK = TargetTransformInfo::SK_PermuteTwoSrc;
 std::iota(Mask.begin(), Mask.end(), 0);
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
-  Mask[InsIdx] = NumDstElts;
-else
-  Mask[InsIdx] = ExtIdx + NumDstElts;
+Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
   }
 
   // Cost
@@ -4594,14 +4587,11 @@ bool 
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   NewCost += TTI.getShuffleCost(SK, DstVecTy, DstVecTy, Mask, CostKind, 0,
 nullptr, {DstVec, SrcVec});
   } else {
-// When creating length-changing-vector, always create with a Mask whose
-// first element has an ExtIdx, so that the first element of the vector
-// being created is always the target to be extracted.
+// When creating a length-changing-vector, always try to keep the relevant
+// element in an equivalent position, so that bulk shuffles are more likely
+// to be useful.
 ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
-if (IsExtIdxInBounds)
-  ExtToVecMask[ExtIdx] = ExtIdx;
-else
-  ExtToVecMask[0] = ExtIdx;
+ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
 // Add cost for expanding or narrowing
 NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
  DstVecTy, SrcVecTy, ExtToVecMask, CostKind);
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll 
b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
index 8c2455dd9d375..6c92892949175 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
@@ -88,22 +88,8 @@ entry:
 define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) {
 ; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening(
 ; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:[[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17
-; OPT-NEXT:[[I_2:%.*]] = extractelement <32 x i8> [[IN]], i64 18
-; OPT-NEXT:[[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19
-; OPT-NEXT:[[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21
-; OPT-NEXT:[[I_6:%.*]] = extractelement <32 x i8> [[IN]], i64 22
-; OPT-NEXT:[[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23
-; OPT-NEXT:[[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, 
<8 x i32> 
-; OPT-NEXT:[[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1
-; OPT-NEXT:[[O_2:%.*]] = insertelement <8 x i8> [[O_1]], i8 [[I_2]], i32 2
-; OPT-NEXT:[[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3
-; OPT-NEXT:[[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> 
poison, <8 x i32> 
-; OPT-NEXT:[[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8> 
[[TMP1]], <

[llvm-branch-commits] [llvm] VectorCombine: Improve the insert/extract fold in the narrowing case (PR #168820)

2025-12-08 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle updated 
https://github.com/llvm/llvm-project/pull/168820

From 2ee921f4d07bd5b457d5e79a848ab6183c3a8c52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= 
Date: Wed, 19 Nov 2025 18:00:32 -0800
Subject: [PATCH] VectorCombine: Improve the insert/extract fold in the
 narrowing case

Keeping the extracted element in a natural position in the narrowed
vector has two beneficial effects:

1. It makes the narrowing shuffles cheaper (at least on AMDGPU), which
   allows the insert/extract fold to trigger.
2. It makes the narrowing shuffles in a chain of extract/insert
   compatible, which allows foldLengthChangingShuffles to successfully
   recognize a chain that can be folded.

There are minor X86 test changes that look reasonable to me. The IR
change for AVX2 in 
llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
doesn't change the assembly generated by `llc -mtriple=x86_64-- -mattr=AVX2`
at all.

commit-id:c151bb04
---
 .../Transforms/Vectorize/VectorCombine.cpp| 22 +--
 .../VectorCombine/AMDGPU/extract-insert-i8.ll | 18 ++-
 .../X86/extract-insert-poison.ll  | 12 ++
 .../VectorCombine/X86/extract-insert.ll   |  8 +++
 .../Transforms/VectorCombine/X86/pr126085.ll  |  4 ++--
 5 files changed, 22 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b83597fec021a..4b081205eba10 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -4558,22 +4558,15 @@ bool 
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   SmallVector Mask(NumDstElts, PoisonMaskElem);
 
   bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
-  bool IsExtIdxInBounds = ExtIdx < NumDstElts;
   bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec);
   if (NeedDstSrcSwap) {
 SK = TargetTransformInfo::SK_PermuteSingleSrc;
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
-  Mask[InsIdx] = 0;
-else
-  Mask[InsIdx] = ExtIdx;
+Mask[InsIdx] = ExtIdx % NumDstElts;
 std::swap(DstVec, SrcVec);
   } else {
 SK = TargetTransformInfo::SK_PermuteTwoSrc;
 std::iota(Mask.begin(), Mask.end(), 0);
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
-  Mask[InsIdx] = NumDstElts;
-else
-  Mask[InsIdx] = ExtIdx + NumDstElts;
+Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
   }
 
   // Cost
@@ -4594,14 +4587,11 @@ bool 
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   NewCost += TTI.getShuffleCost(SK, DstVecTy, DstVecTy, Mask, CostKind, 0,
 nullptr, {DstVec, SrcVec});
   } else {
-// When creating length-changing-vector, always create with a Mask whose
-// first element has an ExtIdx, so that the first element of the vector
-// being created is always the target to be extracted.
+// When creating a length-changing-vector, always try to keep the relevant
+// element in an equivalent position, so that bulk shuffles are more likely
+// to be useful.
 ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
-if (IsExtIdxInBounds)
-  ExtToVecMask[ExtIdx] = ExtIdx;
-else
-  ExtToVecMask[0] = ExtIdx;
+ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
 // Add cost for expanding or narrowing
 NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
  DstVecTy, SrcVecTy, ExtToVecMask, CostKind);
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll 
b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
index 8c2455dd9d375..6c92892949175 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
@@ -88,22 +88,8 @@ entry:
 define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) {
 ; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening(
 ; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:[[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17
-; OPT-NEXT:[[I_2:%.*]] = extractelement <32 x i8> [[IN]], i64 18
-; OPT-NEXT:[[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19
-; OPT-NEXT:[[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21
-; OPT-NEXT:[[I_6:%.*]] = extractelement <32 x i8> [[IN]], i64 22
-; OPT-NEXT:[[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23
-; OPT-NEXT:[[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, 
<8 x i32> 
-; OPT-NEXT:[[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1
-; OPT-NEXT:[[O_2:%.*]] = insertelement <8 x i8> [[O_1]], i8 [[I_2]], i32 2
-; OPT-NEXT:[[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3
-; OPT-NEXT:[[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> 
poison, <8 x i32> 
-; OPT-NEXT:[[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8> 
[[TMP1]], <

[llvm-branch-commits] [llvm] VectorCombine: Fold chains of shuffles fed by length-changing shuffles (PR #168819)

2025-12-08 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle updated 
https://github.com/llvm/llvm-project/pull/168819

From 316715e02c8ebdc5014f5666e62738d0367c853d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= 
Date: Wed, 19 Nov 2025 17:59:11 -0800
Subject: [PATCH] VectorCombine: Fold chains of shuffles fed by length-changing
 shuffles

Such chains can arise from folding insert/extract chains.

commit-id:a960175d
---
 .../Transforms/Vectorize/VectorCombine.cpp| 192 ++
 .../VectorCombine/AMDGPU/extract-insert-i8.ll |  36 +---
 .../shuffles-of-length-changing-shuffles.ll   |  12 +-
 3 files changed, 200 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 243f685cf25e2..b83597fec021a 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -140,6 +140,7 @@ class VectorCombine {
   bool foldShuffleOfCastops(Instruction &I);
   bool foldShuffleOfShuffles(Instruction &I);
   bool foldPermuteOfIntrinsic(Instruction &I);
+  bool foldShufflesOfLengthChangingShuffles(Instruction &I);
   bool foldShuffleOfIntrinsics(Instruction &I);
   bool foldShuffleToIdentity(Instruction &I);
   bool foldShuffleFromReductions(Instruction &I);
@@ -2878,6 +2879,195 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction 
&I) {
   return true;
 }
 
+/// Try to convert a chain of length-preserving shuffles that are fed by
+/// length-changing shuffles from the same source, e.g. a chain of length 3:
+///
+///   "shuffle (shuffle (shuffle x, (shuffle y, undef)),
+/// (shuffle y, undef)),
+//  (shuffle y, undef)"
+///
+/// into a single shuffle fed by a length-changing shuffle:
+///
+///   "shuffle x, (shuffle y, undef)"
+///
+/// Such chains arise e.g. from folding extract/insert sequences.
+bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &I) {
+  FixedVectorType *TrunkType = dyn_cast(I.getType());
+  if (!TrunkType)
+return false;
+
+  unsigned ChainLength = 0;
+  SmallVector Mask;
+  SmallVector YMask;
+  InstructionCost OldCost = 0;
+  InstructionCost NewCost = 0;
+  Value *Trunk = &I;
+  unsigned NumTrunkElts = TrunkType->getNumElements();
+  Value *Y = nullptr;
+
+  for (;;) {
+// Match the current trunk against (commutations of) the pattern
+// "shuffle trunk', (shuffle y, undef)"
+ArrayRef OuterMask;
+Value *OuterV0, *OuterV1;
+if (ChainLength != 0 && !Trunk->hasOneUse())
+  break;
+if (!match(Trunk, m_Shuffle(m_Value(OuterV0), m_Value(OuterV1),
+m_Mask(OuterMask
+  break;
+if (OuterV0->getType() != TrunkType) {
+  // This shuffle is not length-preserving, so it cannot be part of the
+  // chain.
+  break;
+}
+
+ArrayRef InnerMask0, InnerMask1;
+Value *A0, *A1, *B0, *B1;
+bool Match0 =
+match(OuterV0, m_Shuffle(m_Value(A0), m_Value(B0), 
m_Mask(InnerMask0)));
+bool Match1 =
+match(OuterV1, m_Shuffle(m_Value(A1), m_Value(B1), 
m_Mask(InnerMask1)));
+bool Match0Leaf = Match0 && A0->getType() != I.getType();
+bool Match1Leaf = Match1 && A1->getType() != I.getType();
+if (Match0Leaf == Match1Leaf) {
+  // Only handle the case of exactly one leaf in each step. The "two 
leaves"
+  // case is handled by foldShuffleOfShuffles.
+  break;
+}
+
+SmallVector CommutedOuterMask;
+if (Match0Leaf) {
+  std::swap(OuterV0, OuterV1);
+  std::swap(InnerMask0, InnerMask1);
+  std::swap(A0, A1);
+  std::swap(B0, B1);
+  llvm::append_range(CommutedOuterMask, OuterMask);
+  for (int &M : CommutedOuterMask) {
+if (M == PoisonMaskElem)
+  continue;
+if (M < (int)NumTrunkElts)
+  M += NumTrunkElts;
+else
+  M -= NumTrunkElts;
+  }
+  OuterMask = CommutedOuterMask;
+}
+if (!OuterV1->hasOneUse())
+  break;
+
+if (!isa(A1)) {
+  if (!Y)
+Y = A1;
+  else if (Y != A1)
+break;
+}
+if (!isa(B1)) {
+  if (!Y)
+Y = B1;
+  else if (Y != B1)
+break;
+}
+
+auto *YType = cast(A1->getType());
+int NumLeafElts = YType->getNumElements();
+SmallVector LocalYMask(InnerMask1);
+for (int &M : LocalYMask) {
+  if (M >= NumLeafElts)
+M -= NumLeafElts;
+}
+
+InstructionCost LocalOldCost =
+TTI.getInstructionCost(cast(Trunk), CostKind) +
+TTI.getInstructionCost(cast(OuterV1), CostKind);
+
+// Handle the initial (start of chain) case.
+if (!ChainLength) {
+  Mask.assign(OuterMask);
+  YMask.assign(LocalYMask);
+  OldCost = NewCost = LocalOldCost;
+  Trunk = OuterV0;
+  ChainLength++;
+  continue;
+}
+
+// For the non-root case, first attempt to combine masks.
+SmallVector NewYMask(YMask);
+bool Valid = true;
+for (auto

[llvm-branch-commits] [llvm] VectorCombine: Fold chains of shuffles fed by length-changing shuffles (PR #168819)

2025-12-08 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle updated 
https://github.com/llvm/llvm-project/pull/168819

From 316715e02c8ebdc5014f5666e62738d0367c853d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= 
Date: Wed, 19 Nov 2025 17:59:11 -0800
Subject: [PATCH] VectorCombine: Fold chains of shuffles fed by length-changing
 shuffles

Such chains can arise from folding insert/extract chains.

commit-id:a960175d
---
 .../Transforms/Vectorize/VectorCombine.cpp| 192 ++
 .../VectorCombine/AMDGPU/extract-insert-i8.ll |  36 +---
 .../shuffles-of-length-changing-shuffles.ll   |  12 +-
 3 files changed, 200 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 243f685cf25e2..b83597fec021a 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -140,6 +140,7 @@ class VectorCombine {
   bool foldShuffleOfCastops(Instruction &I);
   bool foldShuffleOfShuffles(Instruction &I);
   bool foldPermuteOfIntrinsic(Instruction &I);
+  bool foldShufflesOfLengthChangingShuffles(Instruction &I);
   bool foldShuffleOfIntrinsics(Instruction &I);
   bool foldShuffleToIdentity(Instruction &I);
   bool foldShuffleFromReductions(Instruction &I);
@@ -2878,6 +2879,195 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction 
&I) {
   return true;
 }
 
+/// Try to convert a chain of length-preserving shuffles that are fed by
+/// length-changing shuffles from the same source, e.g. a chain of length 3:
+///
+///   "shuffle (shuffle (shuffle x, (shuffle y, undef)),
+/// (shuffle y, undef)),
+//  (shuffle y, undef)"
+///
+/// into a single shuffle fed by a length-changing shuffle:
+///
+///   "shuffle x, (shuffle y, undef)"
+///
+/// Such chains arise e.g. from folding extract/insert sequences.
+bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &I) {
+  FixedVectorType *TrunkType = dyn_cast(I.getType());
+  if (!TrunkType)
+return false;
+
+  unsigned ChainLength = 0;
+  SmallVector Mask;
+  SmallVector YMask;
+  InstructionCost OldCost = 0;
+  InstructionCost NewCost = 0;
+  Value *Trunk = &I;
+  unsigned NumTrunkElts = TrunkType->getNumElements();
+  Value *Y = nullptr;
+
+  for (;;) {
+// Match the current trunk against (commutations of) the pattern
+// "shuffle trunk', (shuffle y, undef)"
+ArrayRef OuterMask;
+Value *OuterV0, *OuterV1;
+if (ChainLength != 0 && !Trunk->hasOneUse())
+  break;
+if (!match(Trunk, m_Shuffle(m_Value(OuterV0), m_Value(OuterV1),
+m_Mask(OuterMask
+  break;
+if (OuterV0->getType() != TrunkType) {
+  // This shuffle is not length-preserving, so it cannot be part of the
+  // chain.
+  break;
+}
+
+ArrayRef InnerMask0, InnerMask1;
+Value *A0, *A1, *B0, *B1;
+bool Match0 =
+match(OuterV0, m_Shuffle(m_Value(A0), m_Value(B0), 
m_Mask(InnerMask0)));
+bool Match1 =
+match(OuterV1, m_Shuffle(m_Value(A1), m_Value(B1), 
m_Mask(InnerMask1)));
+bool Match0Leaf = Match0 && A0->getType() != I.getType();
+bool Match1Leaf = Match1 && A1->getType() != I.getType();
+if (Match0Leaf == Match1Leaf) {
+  // Only handle the case of exactly one leaf in each step. The "two 
leaves"
+  // case is handled by foldShuffleOfShuffles.
+  break;
+}
+
+SmallVector CommutedOuterMask;
+if (Match0Leaf) {
+  std::swap(OuterV0, OuterV1);
+  std::swap(InnerMask0, InnerMask1);
+  std::swap(A0, A1);
+  std::swap(B0, B1);
+  llvm::append_range(CommutedOuterMask, OuterMask);
+  for (int &M : CommutedOuterMask) {
+if (M == PoisonMaskElem)
+  continue;
+if (M < (int)NumTrunkElts)
+  M += NumTrunkElts;
+else
+  M -= NumTrunkElts;
+  }
+  OuterMask = CommutedOuterMask;
+}
+if (!OuterV1->hasOneUse())
+  break;
+
+if (!isa(A1)) {
+  if (!Y)
+Y = A1;
+  else if (Y != A1)
+break;
+}
+if (!isa(B1)) {
+  if (!Y)
+Y = B1;
+  else if (Y != B1)
+break;
+}
+
+auto *YType = cast(A1->getType());
+int NumLeafElts = YType->getNumElements();
+SmallVector LocalYMask(InnerMask1);
+for (int &M : LocalYMask) {
+  if (M >= NumLeafElts)
+M -= NumLeafElts;
+}
+
+InstructionCost LocalOldCost =
+TTI.getInstructionCost(cast(Trunk), CostKind) +
+TTI.getInstructionCost(cast(OuterV1), CostKind);
+
+// Handle the initial (start of chain) case.
+if (!ChainLength) {
+  Mask.assign(OuterMask);
+  YMask.assign(LocalYMask);
+  OldCost = NewCost = LocalOldCost;
+  Trunk = OuterV0;
+  ChainLength++;
+  continue;
+}
+
+// For the non-root case, first attempt to combine masks.
+SmallVector NewYMask(YMask);
+bool Valid = true;
+for (auto

[llvm-branch-commits] [llvm] VectorCombine: Fold chains of shuffles fed by length-changing shuffles (PR #168819)

2025-12-08 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle updated 
https://github.com/llvm/llvm-project/pull/168819

From 316715e02c8ebdc5014f5666e62738d0367c853d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= 
Date: Wed, 19 Nov 2025 17:59:11 -0800
Subject: [PATCH] VectorCombine: Fold chains of shuffles fed by length-changing
 shuffles

Such chains can arise from folding insert/extract chains.

commit-id:a960175d
---
 .../Transforms/Vectorize/VectorCombine.cpp| 192 ++
 .../VectorCombine/AMDGPU/extract-insert-i8.ll |  36 +---
 .../shuffles-of-length-changing-shuffles.ll   |  12 +-
 3 files changed, 200 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 243f685cf25e2..b83597fec021a 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -140,6 +140,7 @@ class VectorCombine {
   bool foldShuffleOfCastops(Instruction &I);
   bool foldShuffleOfShuffles(Instruction &I);
   bool foldPermuteOfIntrinsic(Instruction &I);
+  bool foldShufflesOfLengthChangingShuffles(Instruction &I);
   bool foldShuffleOfIntrinsics(Instruction &I);
   bool foldShuffleToIdentity(Instruction &I);
   bool foldShuffleFromReductions(Instruction &I);
@@ -2878,6 +2879,195 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction 
&I) {
   return true;
 }
 
+/// Try to convert a chain of length-preserving shuffles that are fed by
+/// length-changing shuffles from the same source, e.g. a chain of length 3:
+///
+///   "shuffle (shuffle (shuffle x, (shuffle y, undef)),
+/// (shuffle y, undef)),
+//  (shuffle y, undef)"
+///
+/// into a single shuffle fed by a length-changing shuffle:
+///
+///   "shuffle x, (shuffle y, undef)"
+///
+/// Such chains arise e.g. from folding extract/insert sequences.
+bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &I) {
+  FixedVectorType *TrunkType = dyn_cast(I.getType());
+  if (!TrunkType)
+return false;
+
+  unsigned ChainLength = 0;
+  SmallVector Mask;
+  SmallVector YMask;
+  InstructionCost OldCost = 0;
+  InstructionCost NewCost = 0;
+  Value *Trunk = &I;
+  unsigned NumTrunkElts = TrunkType->getNumElements();
+  Value *Y = nullptr;
+
+  for (;;) {
+// Match the current trunk against (commutations of) the pattern
+// "shuffle trunk', (shuffle y, undef)"
+ArrayRef OuterMask;
+Value *OuterV0, *OuterV1;
+if (ChainLength != 0 && !Trunk->hasOneUse())
+  break;
+if (!match(Trunk, m_Shuffle(m_Value(OuterV0), m_Value(OuterV1),
+m_Mask(OuterMask
+  break;
+if (OuterV0->getType() != TrunkType) {
+  // This shuffle is not length-preserving, so it cannot be part of the
+  // chain.
+  break;
+}
+
+ArrayRef InnerMask0, InnerMask1;
+Value *A0, *A1, *B0, *B1;
+bool Match0 =
+match(OuterV0, m_Shuffle(m_Value(A0), m_Value(B0), 
m_Mask(InnerMask0)));
+bool Match1 =
+match(OuterV1, m_Shuffle(m_Value(A1), m_Value(B1), 
m_Mask(InnerMask1)));
+bool Match0Leaf = Match0 && A0->getType() != I.getType();
+bool Match1Leaf = Match1 && A1->getType() != I.getType();
+if (Match0Leaf == Match1Leaf) {
+  // Only handle the case of exactly one leaf in each step. The "two 
leaves"
+  // case is handled by foldShuffleOfShuffles.
+  break;
+}
+
+SmallVector CommutedOuterMask;
+if (Match0Leaf) {
+  std::swap(OuterV0, OuterV1);
+  std::swap(InnerMask0, InnerMask1);
+  std::swap(A0, A1);
+  std::swap(B0, B1);
+  llvm::append_range(CommutedOuterMask, OuterMask);
+  for (int &M : CommutedOuterMask) {
+if (M == PoisonMaskElem)
+  continue;
+if (M < (int)NumTrunkElts)
+  M += NumTrunkElts;
+else
+  M -= NumTrunkElts;
+  }
+  OuterMask = CommutedOuterMask;
+}
+if (!OuterV1->hasOneUse())
+  break;
+
+if (!isa(A1)) {
+  if (!Y)
+Y = A1;
+  else if (Y != A1)
+break;
+}
+if (!isa(B1)) {
+  if (!Y)
+Y = B1;
+  else if (Y != B1)
+break;
+}
+
+auto *YType = cast(A1->getType());
+int NumLeafElts = YType->getNumElements();
+SmallVector LocalYMask(InnerMask1);
+for (int &M : LocalYMask) {
+  if (M >= NumLeafElts)
+M -= NumLeafElts;
+}
+
+InstructionCost LocalOldCost =
+TTI.getInstructionCost(cast(Trunk), CostKind) +
+TTI.getInstructionCost(cast(OuterV1), CostKind);
+
+// Handle the initial (start of chain) case.
+if (!ChainLength) {
+  Mask.assign(OuterMask);
+  YMask.assign(LocalYMask);
+  OldCost = NewCost = LocalOldCost;
+  Trunk = OuterV0;
+  ChainLength++;
+  continue;
+}
+
+// For the non-root case, first attempt to combine masks.
+SmallVector NewYMask(YMask);
+bool Valid = true;
+for (auto

[llvm-branch-commits] [llvm] VectorCombine: Improve the insert/extract fold in the narrowing case (PR #168820)

2025-12-08 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle updated 
https://github.com/llvm/llvm-project/pull/168820

From 2ee921f4d07bd5b457d5e79a848ab6183c3a8c52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= 
Date: Wed, 19 Nov 2025 18:00:32 -0800
Subject: [PATCH] VectorCombine: Improve the insert/extract fold in the
 narrowing case

Keeping the extracted element in a natural position in the narrowed
vector has two beneficial effects:

1. It makes the narrowing shuffles cheaper (at least on AMDGPU), which
   allows the insert/extract fold to trigger.
2. It makes the narrowing shuffles in a chain of extract/insert
   compatible, which allows foldLengthChangingShuffles to successfully
   recognize a chain that can be folded.

There are minor X86 test changes that look reasonable to me. The IR
change for AVX2 in 
llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
doesn't change the assembly generated by `llc -mtriple=x86_64-- -mattr=AVX2`
at all.

commit-id:c151bb04
---
 .../Transforms/Vectorize/VectorCombine.cpp| 22 +--
 .../VectorCombine/AMDGPU/extract-insert-i8.ll | 18 ++-
 .../X86/extract-insert-poison.ll  | 12 ++
 .../VectorCombine/X86/extract-insert.ll   |  8 +++
 .../Transforms/VectorCombine/X86/pr126085.ll  |  4 ++--
 5 files changed, 22 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b83597fec021a..4b081205eba10 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -4558,22 +4558,15 @@ bool 
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   SmallVector Mask(NumDstElts, PoisonMaskElem);
 
   bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
-  bool IsExtIdxInBounds = ExtIdx < NumDstElts;
   bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec);
   if (NeedDstSrcSwap) {
 SK = TargetTransformInfo::SK_PermuteSingleSrc;
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
-  Mask[InsIdx] = 0;
-else
-  Mask[InsIdx] = ExtIdx;
+Mask[InsIdx] = ExtIdx % NumDstElts;
 std::swap(DstVec, SrcVec);
   } else {
 SK = TargetTransformInfo::SK_PermuteTwoSrc;
 std::iota(Mask.begin(), Mask.end(), 0);
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
-  Mask[InsIdx] = NumDstElts;
-else
-  Mask[InsIdx] = ExtIdx + NumDstElts;
+Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
   }
 
   // Cost
@@ -4594,14 +4587,11 @@ bool 
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
   NewCost += TTI.getShuffleCost(SK, DstVecTy, DstVecTy, Mask, CostKind, 0,
 nullptr, {DstVec, SrcVec});
   } else {
-// When creating length-changing-vector, always create with a Mask whose
-// first element has an ExtIdx, so that the first element of the vector
-// being created is always the target to be extracted.
+// When creating a length-changing-vector, always try to keep the relevant
+// element in an equivalent position, so that bulk shuffles are more likely
+// to be useful.
 ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
-if (IsExtIdxInBounds)
-  ExtToVecMask[ExtIdx] = ExtIdx;
-else
-  ExtToVecMask[0] = ExtIdx;
+ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
 // Add cost for expanding or narrowing
 NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
  DstVecTy, SrcVecTy, ExtToVecMask, CostKind);
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll 
b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
index 8c2455dd9d375..6c92892949175 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
@@ -88,22 +88,8 @@ entry:
 define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) {
 ; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening(
 ; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:[[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17
-; OPT-NEXT:[[I_2:%.*]] = extractelement <32 x i8> [[IN]], i64 18
-; OPT-NEXT:[[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19
-; OPT-NEXT:[[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21
-; OPT-NEXT:[[I_6:%.*]] = extractelement <32 x i8> [[IN]], i64 22
-; OPT-NEXT:[[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23
-; OPT-NEXT:[[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, 
<8 x i32> 
-; OPT-NEXT:[[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1
-; OPT-NEXT:[[O_2:%.*]] = insertelement <8 x i8> [[O_1]], i8 [[I_2]], i32 2
-; OPT-NEXT:[[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3
-; OPT-NEXT:[[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> 
poison, <8 x i32> 
-; OPT-NEXT:[[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8> 
[[TMP1]], <

[llvm-branch-commits] [NFCI][ELF][AArch64][PAC] Teach addRelativeReloc to emit R_AARCH64_AUTH_RELATIVE (PR #171180)

2025-12-08 Thread Daniil Kovalev via llvm-branch-commits


@@ -704,8 +704,10 @@ static void addRelativeReloc(Ctx &ctx, InputSectionBase 
&isec,
  uint64_t offsetInSec, Symbol &sym, int64_t addend,
  RelExpr expr, RelType type) {
   Partition &part = isec.getPartition(ctx);
+  bool isAArch64Auth =
+  ctx.arg.emachine == EM_AARCH64 && type == R_AARCH64_AUTH_ABS64;
 
-  if (sym.isTagged()) {
+  if (sym.isTagged() && !isAArch64Auth) {

kovdan01 wrote:

> but suspect that the ptrauth and memtag ABIs are currently mutually-exclusive?

@smithp35 Could you please clarify what is the correct answer for the question 
above? I was only working on ptrauth and do not feel confident enough to reason 
about memtag with ptrauth combination :)

https://github.com/llvm/llvm-project/pull/171180
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [TableGen] Improve error message for bad VTByHwMode in RegisterByHwMode (PR #171254)

2025-12-08 Thread Alexander Richardson via llvm-branch-commits

https://github.com/arichardson updated 
https://github.com/llvm/llvm-project/pull/171254


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [TableGen] Improve error message for bad VTByHwMode in RegisterByHwMode (PR #171254)

2025-12-08 Thread Alexander Richardson via llvm-branch-commits

https://github.com/arichardson updated 
https://github.com/llvm/llvm-project/pull/171254


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [AMDGPU] Enable aliases in InstPrinter (PR #171265)

2025-12-08 Thread Alexander Richardson via llvm-branch-commits

https://github.com/arichardson created 
https://github.com/llvm/llvm-project/pull/171265

After the latest change to tablegen, we now handle `RegClassByHwMode`
correctly in the alias pattern output of -gen-asm-writer, so we can
enable it for AMDGPU. Previously, `#define PRINT_ALIAS_INSTR` caused
compilation failures due to tablegen referencing *RegClassID variables
that do not exist for `RegClassByHwMode`.

This causes a large number of test failures (380) so I just put this
up as a draft pull request to see if it is desirable. It looks like the
one test I updated uses wrong mnemonics so probably needs more work.



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [AMDGPU] Enable aliases in InstPrinter (PR #171265)

2025-12-08 Thread Alexander Richardson via llvm-branch-commits

arichardson wrote:

Depends on https://github.com/llvm/llvm-project/pull/171264

https://github.com/llvm/llvm-project/pull/171265
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [AMDGPU] Enable aliases in InstPrinter (PR #171265)

2025-12-08 Thread Alexander Richardson via llvm-branch-commits

https://github.com/arichardson converted_to_draft 
https://github.com/llvm/llvm-project/pull/171265
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [AMDGPU] Enable aliases in InstPrinter (PR #171265)

2025-12-08 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Alexander Richardson (arichardson)


Changes

After the latest change to tablegen, we now handle `RegClassByHwMode`
correctly in the alias pattern output of -gen-asm-writer, so we can
enable it for AMDGPU. Previously, `#define PRINT_ALIAS_INSTR` caused
compilation failures due to tablegen referencing *RegClassID variables
that do not exist for `RegClassByHwMode`.

This causes a large number of test failures (380) so I just put this
up as a draft pull request to see if it is desirable. It looks like the
one test I updated uses wrong mnemonics so probably needs more work.


---
Full diff: https://github.com/llvm/llvm-project/pull/171265.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3-1) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h (+5) 
- (modified) llvm/test/CodeGen/AMDGPU/max.ll (+4-4) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp 
b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index b63d71dc2fde9..35a61616b0f8c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -44,7 +44,8 @@ void AMDGPUInstPrinter::printRegName(raw_ostream &OS, 
MCRegister Reg) {
 void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address,
   StringRef Annot, const MCSubtargetInfo &STI,
   raw_ostream &OS) {
-  printInstruction(MI, Address, STI, OS);
+  if (!PrintAliases || !printAliasInstr(MI, Address, STI, OS))
+printInstruction(MI, Address, STI, OS);
   printAnnotation(OS, Annot);
 }
 
@@ -1944,4 +1945,5 @@ void AMDGPUInstPrinter::printScaleSel(const MCInst *MI, 
unsigned OpNo,
   O << " scale_sel:" << formatDec(Imm);
 }
 
+#define PRINT_ALIAS_INSTR
 #include "AMDGPUGenAsmWriter.inc"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h 
b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 564d6eea52328..b851615280f89 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -39,6 +39,11 @@ class AMDGPUInstPrinter : public MCInstPrinter {
raw_ostream &O, const MCRegisterInfo &MRI);
 
 private:
+  bool printAliasInstr(const MCInst *MI, uint64_t Address,
+   const MCSubtargetInfo &STI, raw_ostream &OS);
+  void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+   unsigned OpIdx, unsigned PrintMethodIdx,
+   const MCSubtargetInfo &STI, raw_ostream &OS);
   void printU16ImmOperand(const MCInst *MI, unsigned OpNo,
   const MCSubtargetInfo &STI, raw_ostream &O);
   void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/llvm/test/CodeGen/AMDGPU/max.ll b/llvm/test/CodeGen/AMDGPU/max.ll
index ac6dd30283554..e839b3dc1a916 100644
--- a/llvm/test/CodeGen/AMDGPU/max.ll
+++ b/llvm/test/CodeGen/AMDGPU/max.ll
@@ -1120,7 +1120,7 @@ define amdgpu_kernel void @test_umax_ugt_i64(ptr 
addrspace(1) %out, i64 %a, i64
 ; SI-NEXT:s_mov_b32 s4, s0
 ; SI-NEXT:v_mov_b32_e32 v0, s8
 ; SI-NEXT:v_mov_b32_e32 v1, s9
-; SI-NEXT:v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
+; SI-NEXT:V_CMP_GT_U64 vcc, s[2:3], v[0:1]
 ; SI-NEXT:s_mov_b32 s5, s1
 ; SI-NEXT:s_and_b64 s[0:1], vcc, exec
 ; SI-NEXT:s_cselect_b32 s0, s3, s9
@@ -1174,7 +1174,7 @@ define amdgpu_kernel void @test_umax_uge_i64(ptr 
addrspace(1) %out, i64 %a, i64
 ; SI-NEXT:s_mov_b32 s4, s0
 ; SI-NEXT:v_mov_b32_e32 v0, s8
 ; SI-NEXT:v_mov_b32_e32 v1, s9
-; SI-NEXT:v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; SI-NEXT:V_CMP_GE_U64 vcc, s[2:3], v[0:1]
 ; SI-NEXT:s_mov_b32 s5, s1
 ; SI-NEXT:s_and_b64 s[0:1], vcc, exec
 ; SI-NEXT:s_cselect_b32 s0, s3, s9
@@ -1228,7 +1228,7 @@ define amdgpu_kernel void @test_imax_sgt_i64(ptr 
addrspace(1) %out, i64 %a, i64
 ; SI-NEXT:s_mov_b32 s4, s0
 ; SI-NEXT:v_mov_b32_e32 v0, s8
 ; SI-NEXT:v_mov_b32_e32 v1, s9
-; SI-NEXT:v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
+; SI-NEXT:V_CMP_GT_I64 vcc, s[2:3], v[0:1]
 ; SI-NEXT:s_mov_b32 s5, s1
 ; SI-NEXT:s_and_b64 s[0:1], vcc, exec
 ; SI-NEXT:s_cselect_b32 s0, s3, s9
@@ -1282,7 +1282,7 @@ define amdgpu_kernel void @test_imax_sge_i64(ptr 
addrspace(1) %out, i64 %a, i64
 ; SI-NEXT:s_mov_b32 s4, s0
 ; SI-NEXT:v_mov_b32_e32 v0, s8
 ; SI-NEXT:v_mov_b32_e32 v1, s9
-; SI-NEXT:v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; SI-NEXT:V_CMP_GE_I64 vcc, s[2:3], v[0:1]
 ; SI-NEXT:s_mov_b32 s5, s1
 ; SI-NEXT:s_and_b64 s[0:1], vcc, exec
 ; SI-NEXT:s_cselect_b32 s0, s3, s9

``




https://github.com/llvm/llvm-project/pull/171265
___
llvm-branch-commits mailing list
llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][py] partially use mlir_type_subclass for IRTypes.cpp (PR #171143)

2025-12-08 Thread Maksim Levental via llvm-branch-commits

https://github.com/makslevental requested changes to this pull request.

This isn't a hard block - just a request for discussion.

> This is part of a longer-term cleanup to only support one subclassing 
> mechanism.

I had the same idea but I think we should go in the opposite direction - remove 
all of the `mlir_*_subclass`es and unify on `PyConcrete*`. I started that here 
https://github.com/llvm/llvm-project/pull/156575 but de-prioritized. If this 
unification is a  priority, I can finish that PR this week. 

I don't think `mlir_*_subclass` is the way to go - they're not "real" in the 
sense that you're giving up everything useful about nanobind by using them - 
i.e., all of the convenience `def*`, type signature generation, etc. 

Why do you think `mlir_type_subclass` should be it instead of PyConcreteType?

https://github.com/llvm/llvm-project/pull/171143
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize binary (PR #171149)

2025-12-08 Thread Gergely Bálint via llvm-branch-commits

https://github.com/bgergely0 created 
https://github.com/llvm/llvm-project/pull/171149

This patch adds BTI landing pads to ShortJmp/LongJmp targets in the
LongJmp pass when optimizing BTI binaries.

BOLT does not have the ability to add BTI to all types of functions.
This patch aims to insert the landing pad where possible, and emit an
error where it currently is not.

BOLT cannot insert BTIs into several function "types", including:
- ignored functions,
- PLT functions,
- other functions without a CFG.

Additional context:

In #161206, BOLT gained the ability to decode the .note.gnu.property
section, and warn about lack of BTI support for BOLT. However, this
warning is misleading: the emitted binary may not need extra BTI landing
pads.

With this patch, the emitted binary will be "BTI-safe".

From b125532e23a8962b3ade994ed23b8bfcb250669e Mon Sep 17 00:00:00 2001
From: Gergely Balint 
Date: Wed, 3 Sep 2025 13:29:39 +
Subject: [PATCH] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize
 binary

This patch adds BTI landing pads to ShortJmp/LongJmp targets in the
LongJmp pass when optimizing BTI binaries.

BOLT does not have the ability to add BTI to all types of functions.
This patch aims to insert the landing pad where possible, and emit an
error where it currently is not.

BOLT cannot insert BTIs into several function "types", including:
- ignored functions,
- PLT functions,
- other functions without a CFG.

Additional context:

In #161206, BOLT gained the ability to decode the .note.gnu.property
section, and warn about lack of BTI support for BOLT. However, this
warning is misleading: the emitted binary may not need extra BTI landing
pads.

With this patch, the emitted binary will be "BTI-safe".
---
 bolt/include/bolt/Core/BinaryBasicBlock.h |  2 +
 bolt/lib/Passes/LongJmp.cpp   | 53 +--
 bolt/lib/Rewrite/GNUPropertyRewriter.cpp  |  3 +-
 bolt/test/AArch64/bti-note.test   |  4 +-
 bolt/test/AArch64/long-jmp-bti-ignored.s  | 35 +++
 bolt/test/AArch64/long-jmp-bti.s  | 46 
 bolt/test/AArch64/no-bti-note.test|  4 +-
 7 files changed, 138 insertions(+), 9 deletions(-)
 create mode 100644 bolt/test/AArch64/long-jmp-bti-ignored.s
 create mode 100644 bolt/test/AArch64/long-jmp-bti.s

diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h 
b/bolt/include/bolt/Core/BinaryBasicBlock.h
index 629f0ce8314dc..2be30c14bf90b 100644
--- a/bolt/include/bolt/Core/BinaryBasicBlock.h
+++ b/bolt/include/bolt/Core/BinaryBasicBlock.h
@@ -890,6 +890,8 @@ class BinaryBasicBlock {
   /// Needed by graph traits.
   BinaryFunction *getParent() const { return getFunction(); }
 
+  bool hasParent() const { return getFunction() != nullptr; }
+
   /// Return true if the containing function is in CFG state.
   bool hasCFG() const;
 
diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp
index 03c1ea9d837e2..f0ef135df06d8 100644
--- a/bolt/lib/Passes/LongJmp.cpp
+++ b/bolt/lib/Passes/LongJmp.cpp
@@ -469,8 +469,8 @@ uint64_t LongJmpPass::getSymbolAddress(const BinaryContext 
&BC,
 }
 
 Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, bool &Modified) {
-  const BinaryFunction &Func = *StubBB.getFunction();
-  const BinaryContext &BC = Func.getBinaryContext();
+  BinaryFunction &Func = *StubBB.getFunction();
+  BinaryContext &BC = Func.getBinaryContext();
   const int Bits = StubBits[&StubBB];
   // Already working with the largest range?
   if (Bits == static_cast(BC.AsmInfo->getCodePointerSize() * 8))
@@ -483,11 +483,54 @@ Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, 
bool &Modified) {
   ~((1ULL << (RangeSingleInstr - 1)) - 1);
 
   const MCSymbol *RealTargetSym = BC.MIB->getTargetSymbol(*StubBB.begin());
-  const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(RealTargetSym);
+  BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(RealTargetSym);
+  BinaryFunction *TargetFunction = BC.getFunctionForSymbol(RealTargetSym);
   uint64_t TgtAddress = getSymbolAddress(BC, RealTargetSym, TgtBB);
   uint64_t DotAddress = BBAddresses[&StubBB];
   uint64_t PCRelTgtAddress = DotAddress > TgtAddress ? DotAddress - TgtAddress
  : TgtAddress - DotAddress;
+
+  auto applyBTIFixup = [&](BinaryFunction *TargetFunction,
+   BinaryBasicBlock *RealTgtBB) {
+// TODO: add support for editing each type, and remove errors.
+if (!TargetFunction && !RealTgtBB) {
+  BC.errs() << "BOLT-ERROR: Cannot add BTI to function with symbol "
+<< RealTargetSym->getName() << "\n";
+  exit(1);
+}
+if (TargetFunction && TargetFunction->isIgnored()) {
+  BC.errs() << "BOLT-ERROR: Cannot add BTI landing pad to ignored function 
"
+<< TargetFunction->getPrintName() << "\n";
+  exit(1);
+}
+if (TargetFunction && !TargetFunction->hasCFG()) {
+  auto FirstII = TargetFunction->instrs().begin();

[llvm-branch-commits] [llvm] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize binary (PR #171149)

2025-12-08 Thread Gergely Bálint via llvm-branch-commits

bgergely0 wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.com/github/pr/llvm/llvm-project/171149?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#171149** https://app.graphite.com/github/pr/llvm/llvm-project/171149?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.com/github/pr/llvm/llvm-project/171149?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#167329** https://app.graphite.com/github/pr/llvm/llvm-project/167329?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#167308** https://app.graphite.com/github/pr/llvm/llvm-project/167308?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#167306** https://app.graphite.com/github/pr/llvm/llvm-project/167306?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#167305** https://app.graphite.com/github/pr/llvm/llvm-project/167305?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/171149
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)

2025-12-08 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/170876

>From fb3eff2ed2cef9f334703988105ae68b8c25be12 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Fri, 5 Dec 2025 16:08:28 +0300
Subject: [PATCH] [ADT] Make use of subsetOf and anyCommon methods of BitVector
 (NFC)

Replace the code along these lines

BitVector Tmp = LHS;
Tmp &= RHS;
return Tmp.any();

and

BitVector Tmp = LHS;
Tmp.reset(RHS);
return Tmp.none();

with `LHS.anyCommon(RHS)` and `LHS.subsetOf(RHS)`, correspondingly, which
do not require creating temporary BitVector and can return early.
---
 bolt/include/bolt/Passes/LivenessAnalysis.h   |  5 ++---
 bolt/include/bolt/Passes/ReachingDefOrUse.h   |  3 +--
 bolt/lib/Passes/RegReAssign.cpp   |  8 ++--
 bolt/lib/Passes/ShrinkWrapping.cpp| 10 --
 bolt/lib/Passes/StackAvailableExpressions.cpp |  3 +--
 bolt/lib/Passes/TailDuplication.cpp   |  8 
 llvm/lib/CodeGen/RDFRegisters.cpp |  6 ++
 llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp |  6 ++
 8 files changed, 18 insertions(+), 31 deletions(-)

diff --git a/bolt/include/bolt/Passes/LivenessAnalysis.h 
b/bolt/include/bolt/Passes/LivenessAnalysis.h
index 1df1113318d0b..f4faa1dc34ecd 100644
--- a/bolt/include/bolt/Passes/LivenessAnalysis.h
+++ b/bolt/include/bolt/Passes/LivenessAnalysis.h
@@ -37,10 +37,9 @@ class LivenessAnalysis : public 
DataflowAnalysisgetStateAt(PP));
+const BitVector &BV = *this->getStateAt(PP);
 const BitVector &RegAliases = BC.MIB->getAliases(Reg);
-BV &= RegAliases;
-return BV.any();
+return BV.anyCommon(RegAliases);
   }
 
   void run() { Parent::run(); }
diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h 
b/bolt/include/bolt/Passes/ReachingDefOrUse.h
index 585d673e3b84e..41a6091aad4cb 100644
--- a/bolt/include/bolt/Passes/ReachingDefOrUse.h
+++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h
@@ -133,8 +133,7 @@ class ReachingDefOrUse
   RA.getInstClobberList(Point, Regs);
 else
   RA.getInstUsedRegsList(Point, Regs, false);
-Regs &= this->BC.MIB->getAliases(*TrackingReg);
-if (Regs.any())
+if (Regs.anyCommon(this->BC.MIB->getAliases(*TrackingReg)))
   Next.set(this->ExprToIdx[&Point]);
   }
 }
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 0859cd244ce40..54eff51bfee68 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -316,18 +316,14 @@ void 
RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
   break;
 }
 
-BitVector AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ClassicReg))) {
   LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
 << " with " << BC.MRI->getName(ExtReg)
 << " because classic reg is alive\n");
   --End;
   continue;
 }
-AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ExtReg))) {
   LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
 << " with " << BC.MRI->getName(ExtReg)
 << " because extended reg is alive\n");
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp 
b/bolt/lib/Passes/ShrinkWrapping.cpp
index fe342ccd38a67..b882e2512866d 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -1100,9 +1100,8 @@ SmallVector 
ShrinkWrapping::fixPopsPlacements(
 bool Found = false;
 if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
 SaveOffset) {
-  BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
-  BV &= UsesByReg[CSR];
-  if (!BV.any()) {
+  const BitVector &BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
+  if (!BV.anyCommon(UsesByReg[CSR])) {
 Found = true;
 PP = BB;
 continue;
@@ -1110,9 +1109,8 @@ SmallVector 
ShrinkWrapping::fixPopsPlacements(
 }
 for (MCInst &Inst : llvm::reverse(*BB)) {
   if (SPT.getStateBefore(Inst)->first == SaveOffset) {
-BitVector BV = *RI.getStateAt(Inst);
-BV &= UsesByReg[CSR];
-if (!BV.any()) {
+const BitVector &BV = *RI.getStateAt(Inst);
+if (!BV.anyCommon(UsesByReg[CSR])) {
   Found = true;
   PP = &Inst;
   break;
diff --git a/bolt/lib/Passes/StackAvailableExpressions.cpp 
b/bolt/lib/Passes/StackAvailableExpressions.cpp
index a0d361f273de2..c685cc19badc3 100644
--- a/bolt/lib/Passes/StackAvailableExpressions.cpp
+++ b/bolt/lib/Passes/StackAvailableExpressions.cpp
@@ -103,8 +103,7 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst 
*X

[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)

2025-12-08 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/170876

>From fb3eff2ed2cef9f334703988105ae68b8c25be12 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Fri, 5 Dec 2025 16:08:28 +0300
Subject: [PATCH] [ADT] Make use of subsetOf and anyCommon methods of BitVector
 (NFC)

Replace the code along these lines

BitVector Tmp = LHS;
Tmp &= RHS;
return Tmp.any();

and

BitVector Tmp = LHS;
Tmp.reset(RHS);
return Tmp.none();

with `LHS.anyCommon(RHS)` and `LHS.subsetOf(RHS)`, correspondingly, which
do not require creating temporary BitVector and can return early.
---
 bolt/include/bolt/Passes/LivenessAnalysis.h   |  5 ++---
 bolt/include/bolt/Passes/ReachingDefOrUse.h   |  3 +--
 bolt/lib/Passes/RegReAssign.cpp   |  8 ++--
 bolt/lib/Passes/ShrinkWrapping.cpp| 10 --
 bolt/lib/Passes/StackAvailableExpressions.cpp |  3 +--
 bolt/lib/Passes/TailDuplication.cpp   |  8 
 llvm/lib/CodeGen/RDFRegisters.cpp |  6 ++
 llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp |  6 ++
 8 files changed, 18 insertions(+), 31 deletions(-)

diff --git a/bolt/include/bolt/Passes/LivenessAnalysis.h 
b/bolt/include/bolt/Passes/LivenessAnalysis.h
index 1df1113318d0b..f4faa1dc34ecd 100644
--- a/bolt/include/bolt/Passes/LivenessAnalysis.h
+++ b/bolt/include/bolt/Passes/LivenessAnalysis.h
@@ -37,10 +37,9 @@ class LivenessAnalysis : public 
DataflowAnalysisgetStateAt(PP));
+const BitVector &BV = *this->getStateAt(PP);
 const BitVector &RegAliases = BC.MIB->getAliases(Reg);
-BV &= RegAliases;
-return BV.any();
+return BV.anyCommon(RegAliases);
   }
 
   void run() { Parent::run(); }
diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h 
b/bolt/include/bolt/Passes/ReachingDefOrUse.h
index 585d673e3b84e..41a6091aad4cb 100644
--- a/bolt/include/bolt/Passes/ReachingDefOrUse.h
+++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h
@@ -133,8 +133,7 @@ class ReachingDefOrUse
   RA.getInstClobberList(Point, Regs);
 else
   RA.getInstUsedRegsList(Point, Regs, false);
-Regs &= this->BC.MIB->getAliases(*TrackingReg);
-if (Regs.any())
+if (Regs.anyCommon(this->BC.MIB->getAliases(*TrackingReg)))
   Next.set(this->ExprToIdx[&Point]);
   }
 }
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 0859cd244ce40..54eff51bfee68 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -316,18 +316,14 @@ void 
RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
   break;
 }
 
-BitVector AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ClassicReg))) {
   LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
 << " with " << BC.MRI->getName(ExtReg)
 << " because classic reg is alive\n");
   --End;
   continue;
 }
-AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ExtReg))) {
   LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
 << " with " << BC.MRI->getName(ExtReg)
 << " because extended reg is alive\n");
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp 
b/bolt/lib/Passes/ShrinkWrapping.cpp
index fe342ccd38a67..b882e2512866d 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -1100,9 +1100,8 @@ SmallVector 
ShrinkWrapping::fixPopsPlacements(
 bool Found = false;
 if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
 SaveOffset) {
-  BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
-  BV &= UsesByReg[CSR];
-  if (!BV.any()) {
+  const BitVector &BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
+  if (!BV.anyCommon(UsesByReg[CSR])) {
 Found = true;
 PP = BB;
 continue;
@@ -1110,9 +1109,8 @@ SmallVector 
ShrinkWrapping::fixPopsPlacements(
 }
 for (MCInst &Inst : llvm::reverse(*BB)) {
   if (SPT.getStateBefore(Inst)->first == SaveOffset) {
-BitVector BV = *RI.getStateAt(Inst);
-BV &= UsesByReg[CSR];
-if (!BV.any()) {
+const BitVector &BV = *RI.getStateAt(Inst);
+if (!BV.anyCommon(UsesByReg[CSR])) {
   Found = true;
   PP = &Inst;
   break;
diff --git a/bolt/lib/Passes/StackAvailableExpressions.cpp 
b/bolt/lib/Passes/StackAvailableExpressions.cpp
index a0d361f273de2..c685cc19badc3 100644
--- a/bolt/lib/Passes/StackAvailableExpressions.cpp
+++ b/bolt/lib/Passes/StackAvailableExpressions.cpp
@@ -103,8 +103,7 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst 
*X

[llvm-branch-commits] [flang] [flang][OpenMP] Generalize checks of loop construct structure (PR #170735)

2025-12-08 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/170735

>From 9a2d3dca08ab237e7e949fd5642c96cf0fba89b8 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Tue, 2 Dec 2025 14:59:34 -0600
Subject: [PATCH 1/3] [flang][OpenMP] Generalize checks of loop construct
 structure

For an OpenMP loop construct, count how many loops will effectively be
contained in its associated block. For constructs that are loop-nest
associated this number should be 1. Report cases where this number is
different.

Take into account that the block associated with a loop construct can
contain compiler directives.
---
 flang/lib/Semantics/check-omp-loop.cpp| 201 +++---
 flang/lib/Semantics/check-omp-structure.h |   3 +-
 flang/test/Parser/OpenMP/tile-fail.f90|   8 +-
 flang/test/Semantics/OpenMP/do21.f90  |  10 +-
 .../Semantics/OpenMP/loop-association.f90 |   6 +-
 .../OpenMP/loop-transformation-clauses01.f90  |  16 +-
 .../loop-transformation-construct01.f90   |   4 +-
 .../loop-transformation-construct02.f90   |   8 +-
 .../loop-transformation-construct04.f90   |   4 +-
 9 files changed, 156 insertions(+), 104 deletions(-)

diff --git a/flang/lib/Semantics/check-omp-loop.cpp 
b/flang/lib/Semantics/check-omp-loop.cpp
index fc4b9222d91b3..6414f0028e008 100644
--- a/flang/lib/Semantics/check-omp-loop.cpp
+++ b/flang/lib/Semantics/check-omp-loop.cpp
@@ -37,6 +37,14 @@
 #include 
 #include 
 
+namespace Fortran::semantics {
+static bool IsLoopTransforming(llvm::omp::Directive dir);
+static bool IsFullUnroll(const parser::OpenMPLoopConstruct &x);
+static std::optional CountGeneratedLoops(
+const parser::ExecutionPartConstruct &epc);
+static std::optional CountGeneratedLoops(const parser::Block &block);
+} // namespace Fortran::semantics
+
 namespace {
 using namespace Fortran;
 
@@ -263,22 +271,19 @@ static bool IsLoopTransforming(llvm::omp::Directive dir) {
 }
 
 void OmpStructureChecker::CheckNestedBlock(const parser::OpenMPLoopConstruct 
&x,
-const parser::Block &body, size_t &nestedCount) {
+const parser::Block &body) {
   for (auto &stmt : body) {
 if (auto *dir{parser::Unwrap(stmt)}) {
   context_.Say(dir->source,
   "Compiler directives are not allowed inside OpenMP loop 
constructs"_warn_en_US);
-} else if (parser::Unwrap(stmt)) {
-  ++nestedCount;
 } else if (auto *omp{parser::Unwrap(stmt)}) {
   if (!IsLoopTransforming(omp->BeginDir().DirName().v)) {
 context_.Say(omp->source,
 "Only loop-transforming OpenMP constructs are allowed inside 
OpenMP loop constructs"_err_en_US);
   }
-  ++nestedCount;
 } else if (auto *block{parser::Unwrap(stmt)}) {
-  CheckNestedBlock(x, std::get(block->t), nestedCount);
-} else {
+  CheckNestedBlock(x, std::get(block->t));
+} else if (!parser::Unwrap(stmt)) {
   parser::CharBlock source{parser::GetSource(stmt).value_or(x.source)};
   context_.Say(source,
   "OpenMP loop construct can only contain DO loops or 
loop-nest-generating OpenMP constructs"_err_en_US);
@@ -286,16 +291,96 @@ void OmpStructureChecker::CheckNestedBlock(const 
parser::OpenMPLoopConstruct &x,
   }
 }
 
+static bool IsFullUnroll(const parser::OpenMPLoopConstruct &x) {
+  const parser::OmpDirectiveSpecification &beginSpec{x.BeginDir()};
+
+  if (beginSpec.DirName().v == llvm::omp::Directive::OMPD_unroll) {
+return llvm::none_of(beginSpec.Clauses().v, [](const parser::OmpClause &c) 
{
+  return c.Id() == llvm::omp::Clause::OMPC_partial;
+});
+  }
+  return false;
+}
+
+static std::optional CountGeneratedLoops(
+const parser::ExecutionPartConstruct &epc) {
+  if (parser::Unwrap(epc)) {
+return 1;
+  }
+
+  auto &omp{DEREF(parser::Unwrap(epc))};
+  const parser::OmpDirectiveSpecification &beginSpec{omp.BeginDir()};
+  llvm::omp::Directive dir{beginSpec.DirName().v};
+
+  // TODO: Handle split, apply.
+  if (IsFullUnroll(omp)) {
+return std::nullopt;
+  }
+  if (dir == llvm::omp::Directive::OMPD_fuse) {
+auto rangeAt{
+llvm::find_if(beginSpec.Clauses().v, [](const parser::OmpClause &c) {
+  return c.Id() == llvm::omp::Clause::OMPC_looprange;
+})};
+if (rangeAt == beginSpec.Clauses().v.end()) {
+  return std::nullopt;
+}
+
+auto *loopRange{parser::Unwrap(*rangeAt)};
+std::optional count{GetIntValue(std::get<1>(loopRange->t))};
+if (!count || *count <= 0) {
+  return std::nullopt;
+}
+if (auto nestedCount{CountGeneratedLoops(std::get(omp.t))}) 
{
+  return 1 + *nestedCount - static_cast(*count);
+} else {
+  return std::nullopt;
+}
+  }
+
+  // For every other loop construct return 1.
+  return 1;
+}
+
+static std::optional CountGeneratedLoops(const parser::Block &block) {
+  // Count the number of loops in the associated block. If there are any
+  // malformed construct in there, getting the number may be meaningless.
+  // These issue

[llvm-branch-commits] [FlowSensitive] [StatusOr] [14/N] Support nested StatusOrs (PR #170950)

2025-12-08 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer edited 
https://github.com/llvm/llvm-project/pull/170950
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Use different name scope for MIMGBaseOpcode (PR #170904)

2025-12-08 Thread Mirko Brkušanin via llvm-branch-commits

https://github.com/mbrkusanin updated 
https://github.com/llvm/llvm-project/pull/170904

From 882c6ec5c217e4a29963ba735a7068b28d92bf96 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin 
Date: Fri, 5 Dec 2025 19:21:27 +0100
Subject: [PATCH 1/2] [AMDGPU] Use different name scope for MIMGBaseOpcode

Define MIMGBaseOpcode values with `enum class` instead of regular
`enum` so they will be in a separate name scope from regular
opcodes. These two groups of opcodes should not be mixed and
keeping them in different scopes will reduce a chance of
introducing bugs.
---
 llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h  |  6 ++---
 .../AMDGPU/AMDGPUInstructionSelector.cpp  |  2 +-
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 19 ---
 llvm/lib/Target/AMDGPU/MIMGInstructions.td|  3 ++-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 23 ++-
 .../Target/AMDGPU/SILoadStoreOptimizer.cpp|  4 +++-
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp|  2 +-
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 16 ++---
 8 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h 
b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 529da8d28a3c1..328ef3e10f5c9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -49,8 +49,8 @@ const D16ImageDimIntrinsic 
*lookupD16ImageDimIntrinsic(unsigned Intr);
 
 struct ImageDimIntrinsicInfo {
   unsigned Intr;
-  unsigned BaseOpcode;
-  unsigned AtomicNoRetBaseOpcode;
+  MIMGBaseOpcode BaseOpcode;
+  MIMGBaseOpcode AtomicNoRetBaseOpcode;
   MIMGDim Dim;
 
   uint8_t NumOffsetArgs;
@@ -85,7 +85,7 @@ struct ImageDimIntrinsicInfo {
 const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);
 
 const ImageDimIntrinsicInfo *
-getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim);
+getImageDimIntrinsicByBaseOpcode(MIMGBaseOpcode BaseOpcode, unsigned Dim);
 
 } // end AMDGPU namespace
 } // End llvm namespace
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 15492144ba615..e3d4a063c7d3a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2054,7 +2054,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
   MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
   MachineBasicBlock *MBB = MI.getParent();
   const DebugLoc &DL = MI.getDebugLoc();
-  unsigned IntrOpcode = Intr->BaseOpcode;
+  AMDGPU::MIMGBaseOpcode IntrOpcode = Intr->BaseOpcode;
 
   // For image atomic: use no-return opcode if result is unused.
   if (Intr->AtomicNoRetBaseOpcode != Intr->BaseOpcode) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index cb1a4ee6d542e..137aa7d3d18e9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -6850,8 +6850,10 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
   }
 
   const bool IsAtomicPacked16Bit =
-  (BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
-   BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16);
+  (BaseOpcode->BaseOpcode ==
+   AMDGPU::MIMGBaseOpcode::IMAGE_ATOMIC_PK_ADD_F16 ||
+   BaseOpcode->BaseOpcode ==
+   AMDGPU::MIMGBaseOpcode::IMAGE_ATOMIC_PK_ADD_BF16);
 
   // Check for 16 bit addresses and pack if true.
   LLT GradTy =
@@ -7476,10 +7478,11 @@ bool 
AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic(
   const bool UseNSA =
   IsGFX12Plus || (ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize());
 
-  const unsigned BaseOpcodes[2][2] = {
-  {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
-  {AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
-   AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
+  const AMDGPU::MIMGBaseOpcode BaseOpcodes[2][2] = {
+  {AMDGPU::MIMGBaseOpcode::IMAGE_BVH_INTERSECT_RAY,
+   AMDGPU::MIMGBaseOpcode::IMAGE_BVH_INTERSECT_RAY_a16},
+  {AMDGPU::MIMGBaseOpcode::IMAGE_BVH64_INTERSECT_RAY,
+   AMDGPU::MIMGBaseOpcode::IMAGE_BVH64_INTERSECT_RAY_a16}};
   int Opcode;
   if (UseNSA) {
 Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
@@ -7622,8 +7625,8 @@ bool 
AMDGPULegalizerInfo::legalizeBVHDualOrBVH8IntersectRayIntrinsic(
   const unsigned NumVDataDwords = 10;
   const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
   int Opcode = AMDGPU::getMIMGOpcode(
-  IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
- : AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
+  IsBVH8 ? AMDGPU::MIMGBaseOpcode::IMAGE_BVH8_INTERSECT_RAY
+ : AMDGPU::MIMGBaseOpcode::IMAGE_BVH_DUAL_INTERSECT_RAY,
   AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
   assert(Opcode != -1);
 
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td 
b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 65dce74a1e894..494c3c07ea

[llvm-branch-commits] [clang] [LifetimeSafety] Add implicit tracking for STL functions (PR #170005)

2025-12-08 Thread Yitzhak Mandelbaum via llvm-branch-commits

https://github.com/ymand approved this pull request.


https://github.com/llvm/llvm-project/pull/170005
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [LifetimeSafety] Add implicit tracking for STL functions (PR #170005)

2025-12-08 Thread Yitzhak Mandelbaum via llvm-branch-commits


@@ -71,6 +71,88 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl 
*FD) {
   return isNormalAssignmentOperator(FD);
 }
 
+// Decl::isInStdNamespace will return false for iterators in some STL
+// implementations due to them being defined in a namespace outside of the std
+// namespace.
+static bool isInStlNamespace(const Decl *D) {
+  const DeclContext *DC = D->getDeclContext();
+  if (!DC)
+return false;
+  if (const auto *ND = dyn_cast(DC))
+if (const IdentifierInfo *II = ND->getIdentifier()) {
+  StringRef Name = II->getName();
+  if (Name.size() >= 2 && Name.front() == '_' &&
+  (Name[1] == '_' || isUppercase(Name[1])))
+return true;
+}
+
+  return DC->isStdNamespace();
+}
+
+static bool isPointerLikeType(QualType QT) {
+  return isGslPointerType(QT) || QT->isPointerType() || QT->isNullPtrType();
+}
+
+bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
+  if (auto *Conv = dyn_cast_or_null(Callee))
+if (isGslPointerType(Conv->getConversionType()) &&
+Callee->getParent()->hasAttr())
+  return true;
+  if (!isInStlNamespace(Callee->getParent()))
+return false;
+  if (!isGslPointerType(Callee->getFunctionObjectParameterType()) &&
+  !isGslOwnerType(Callee->getFunctionObjectParameterType()))
+return false;
+  if (isPointerLikeType(Callee->getReturnType())) {
+if (!Callee->getIdentifier())
+  return false;
+return llvm::StringSwitch(Callee->getName())
+.Cases({"begin", "rbegin", "cbegin", "crbegin"}, true)
+.Cases({"end", "rend", "cend", "crend"}, true)
+.Cases({"c_str", "data", "get"}, true)

ymand wrote:

why split into three separate `Cases`?

https://github.com/llvm/llvm-project/pull/170005
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [LifetimeSafety] Add implicit tracking for STL functions (PR #170005)

2025-12-08 Thread Yitzhak Mandelbaum via llvm-branch-commits


@@ -71,6 +71,88 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl 
*FD) {
   return isNormalAssignmentOperator(FD);
 }
 
+// Decl::isInStdNamespace will return false for iterators in some STL

ymand wrote:

nit: maybe preface with "This function is needed because "?

https://github.com/llvm/llvm-project/pull/170005
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Use different name scope for MIMGEncoding and MIMGDim (PR #171166)

2025-12-08 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Mirko Brkušanin (mbrkusanin)


Changes

Use new scoped enums with type set to uint8_t.

---

Patch is 22.40 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/171166.diff


12 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+22-16) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+12-10) 
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+5-5) 
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp (+3-3) 
- (modified) llvm/lib/Target/AMDGPU/MIMGInstructions.td (+18-16) 
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+34-26) 
- (modified) llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp (+6-6) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+4-4) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+3-3) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h 
b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 328ef3e10f5c9..480bdb4b1d04f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -85,7 +85,7 @@ struct ImageDimIntrinsicInfo {
 const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);
 
 const ImageDimIntrinsicInfo *
-getImageDimIntrinsicByBaseOpcode(MIMGBaseOpcode BaseOpcode, unsigned Dim);
+getImageDimIntrinsicByBaseOpcode(MIMGBaseOpcode BaseOpcode, MIMGDim Dim);
 
 } // end AMDGPU namespace
 } // End llvm namespace
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index e3d4a063c7d3a..6bd8e54eac2ee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2196,22 +2196,26 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
 
   int Opcode = -1;
   if (IsGFX12Plus) {
-Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx12,
-   NumVDataDwords, NumVAddrDwords);
+Opcode =
+AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncoding::MIMGEncGfx12,
+  NumVDataDwords, NumVAddrDwords);
   } else if (IsGFX11Plus) {
-Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
-   UseNSA ? AMDGPU::MIMGEncGfx11NSA
-  : AMDGPU::MIMGEncGfx11Default,
-   NumVDataDwords, NumVAddrDwords);
+Opcode = AMDGPU::getMIMGOpcode(
+IntrOpcode,
+UseNSA ? AMDGPU::MIMGEncoding::MIMGEncGfx11NSA
+   : AMDGPU::MIMGEncoding::MIMGEncGfx11Default,
+NumVDataDwords, NumVAddrDwords);
   } else if (IsGFX10Plus) {
-Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
-   UseNSA ? AMDGPU::MIMGEncGfx10NSA
-  : AMDGPU::MIMGEncGfx10Default,
-   NumVDataDwords, NumVAddrDwords);
+Opcode = AMDGPU::getMIMGOpcode(
+IntrOpcode,
+UseNSA ? AMDGPU::MIMGEncoding::MIMGEncGfx10NSA
+   : AMDGPU::MIMGEncoding::MIMGEncGfx10Default,
+NumVDataDwords, NumVAddrDwords);
   } else {
 if (Subtarget->hasGFX90AInsts()) {
-  Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a,
- NumVDataDwords, NumVAddrDwords);
+  Opcode =
+  AMDGPU::getMIMGOpcode(IntrOpcode, 
AMDGPU::MIMGEncoding::MIMGEncGfx90a,
+NumVDataDwords, NumVAddrDwords);
   if (Opcode == -1) {
 LLVM_DEBUG(
 dbgs()
@@ -2221,11 +2225,13 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
 }
 if (Opcode == -1 &&
 STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
-  Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
- NumVDataDwords, NumVAddrDwords);
+  Opcode =
+  AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncoding::MIMGEncGfx8,
+NumVDataDwords, NumVAddrDwords);
 if (Opcode == -1)
-  Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
- NumVDataDwords, NumVAddrDwords);
+  Opcode =
+  AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncoding::MIMGEncGfx6,
+NumVDataDwords, NumVAddrDwords);
   }
   if (Opcode == -1)
 return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 137aa7d3d18e9..9f886b8f4c155 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7485,17 +748

  1   2   >