[llvm-branch-commits] AArch64: Emit PAuth ifuncs into the same comdat as the containing global. (PR #170944)
https://github.com/fmayer approved this pull request. https://github.com/llvm/llvm-project/pull/170944 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-tidy] add abseil-unchecked-statusor-access (PR #171188)
https://github.com/fmayer updated
https://github.com/llvm/llvm-project/pull/171188
>From 3a7705624359678edaed5c7b9686cae034cb4bfd Mon Sep 17 00:00:00 2001
From: Florian Mayer
Date: Mon, 8 Dec 2025 13:10:30 -0800
Subject: [PATCH 1/2] change
Created using spr 1.3.7
---
.../clang-tidy/abseil/UncheckedStatusOrAccessCheck.h| 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang-tools-extra/clang-tidy/abseil/UncheckedStatusOrAccessCheck.h
b/clang-tools-extra/clang-tidy/abseil/UncheckedStatusOrAccessCheck.h
index cf47703f0a972..8fefee4691be6 100644
--- a/clang-tools-extra/clang-tidy/abseil/UncheckedStatusOrAccessCheck.h
+++ b/clang-tools-extra/clang-tidy/abseil/UncheckedStatusOrAccessCheck.h
@@ -10,7 +10,7 @@ namespace clang::tidy::abseil {
// assuring that it contains a value.
//
// For details on the dataflow analysis implemented in this check see:
-// http://google3/devtools/cymbal/nullability/statusor
+// clang/lib/Analysis/FlowSensitive/Models/UncheckedStatusOrAccessModel.cpp
class UncheckedStatusOrAccessCheck : public ClangTidyCheck {
public:
using ClangTidyCheck::ClangTidyCheck;
>From d020c4b77da52906b21b382650e664439c3aaa65 Mon Sep 17 00:00:00 2001
From: Florian Mayer
Date: Mon, 8 Dec 2025 17:49:15 -0800
Subject: [PATCH 2/2] test
Created using spr 1.3.7
---
.../abseil/Inputs/absl/meta/type_traits.h | 46 ++
.../abseil/Inputs/absl/status/status.h| 69 +++
.../abseil/Inputs/absl/status/statusor.h | 346 ++
.../checkers/abseil/Inputs/cstddef.h | 10 +
.../checkers/abseil/Inputs/initializer_list | 11 +
.../checkers/abseil/Inputs/type_traits| 427 ++
.../abseil-unchecked-statusor-access.cpp | 138 ++
7 files changed, 1047 insertions(+)
create mode 100644
clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/meta/type_traits.h
create mode 100644
clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/status/status.h
create mode 100644
clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/status/statusor.h
create mode 100644
clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/cstddef.h
create mode 100644
clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/initializer_list
create mode 100644
clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/type_traits
create mode 100644
clang-tools-extra/test/clang-tidy/checkers/abseil/abseil-unchecked-statusor-access.cpp
diff --git
a/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/meta/type_traits.h
b/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/meta/type_traits.h
new file mode 100644
index 0..06ce61dbcc1e7
--- /dev/null
+++
b/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/meta/type_traits.h
@@ -0,0 +1,46 @@
+#include
+
+namespace absl {
+
+template
+struct conjunction : std::true_type {};
+
+template
+struct conjunction
+: std::conditional, T>::type {};
+
+template
+struct conjunction : T {};
+
+template
+struct disjunction : std::false_type {};
+
+template
+struct disjunction
+: std::conditional>::type {};
+
+template
+struct disjunction : T {};
+
+template
+struct negation : std::integral_constant {};
+
+template
+using enable_if_t = typename std::enable_if::type;
+
+
+template
+using conditional_t = typename std::conditional::type;
+
+template
+using remove_cv_t = typename std::remove_cv::type;
+
+template
+using remove_reference_t = typename std::remove_reference::type;
+
+template
+using decay_t = typename std::decay::type;
+
+using std::in_place;
+using std::in_place_t;
+} // namespace absl
diff --git
a/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/status/status.h
b/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/status/status.h
new file mode 100644
index 0..fd0910e81436a
--- /dev/null
+++
b/clang-tools-extra/test/clang-tidy/checkers/abseil/Inputs/absl/status/status.h
@@ -0,0 +1,69 @@
+namespace absl {
+struct SourceLocation {
+ static constexpr SourceLocation current();
+ static constexpr SourceLocation
+ DoNotInvokeDirectlyNoSeriouslyDont(int line, const char *file_name);
+};
+} // namespace absl
+namespace absl {
+enum class StatusCode : int {
+ kOk,
+ kCancelled,
+ kUnknown,
+ kInvalidArgument,
+ kDeadlineExceeded,
+ kNotFound,
+ kAlreadyExists,
+ kPermissionDenied,
+ kResourceExhausted,
+ kFailedPrecondition,
+ kAborted,
+ kOutOfRange,
+ kUnimplemented,
+ kInternal,
+ kUnavailable,
+ kDataLoss,
+ kUnauthenticated,
+};
+} // namespace absl
+
+namespace absl {
+enum class StatusToStringMode : int {
+ kWithNoExtraData = 0,
+ kWithPayload = 1 << 0,
+ kWithSourceLocation = 1 << 1,
+ kWithEverything = ~kWithNoExtraData,
+ kDefault = kWithPayload,
+};
+class Status {
+public:
+ Status();
+ Status(const Status &base_status, absl::SourceLocation loc);
+ Status(Status &&base_status, absl::SourceLocation loc);
+ ~Status() {}
+
+ Status(const St
[llvm-branch-commits] [clang-tools-extra] [clang-tidy] add abseil-unchecked-statusor-access (PR #171188)
https://github.com/fmayer edited https://github.com/llvm/llvm-project/pull/171188 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang-tools-extra] [clang-tidy] add abseil-unchecked-statusor-access (PR #171188)
https://github.com/fmayer ready_for_review https://github.com/llvm/llvm-project/pull/171188 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)
weliveindetail wrote: That's fair https://github.com/llvm/llvm-project/pull/170846 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200
-amdgpu-enable-machine-level-inliner < %s | FileCheck %s
+
+declare !callback !0 i32 @llvm.amdgcn.call.whole.wave.i32.p0(ptr, ...)
+
+define amdgpu_cs void @inline_simple_wwf(i32 %input, ptr addrspace(1) %output)
{
+; CHECK-LABEL: inline_simple_wwf:
+; CHECK: ; %bb.0:
+; CHECK-NEXT:s_mov_b32 s1, simple_whole_wave_func@abs32@hi
+; CHECK-NEXT:s_mov_b32 s0, simple_whole_wave_func@abs32@lo
+; CHECK-NEXT:s_mov_b32 s32, 0
+; CHECK-NEXT:v_dual_mov_b32 v41, v2 :: v_dual_mov_b32 v40, v1
+; CHECK-NEXT:s_swappc_b64 s[30:31], s[0:1]
+; CHECK-NEXT:global_store_b32 v[40:41], v0, off
+; CHECK-NEXT:s_endpgm
+ %result = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr
@simple_whole_wave_func, i32 %input)
+ store i32 %result, ptr addrspace(1) %output
+ ret void
+}
+
+define amdgpu_gfx_whole_wave i32 @simple_whole_wave_func(i1 %active, i32 %x) {
+ %result = add i32 %x, 42
+ ret i32 %result
+}
+
+define amdgpu_gfx_whole_wave i32 @another_whole_wave_func(i1 %active, i32 %a,
i32 %b) {
+ %sum = add i32 %a, %b
+ %result = mul i32 %sum, 2
+ ret i32 %result
+}
+
+define amdgpu_cs void @inline_multiple_wwf(i32 %x, i32 %y, ptr addrspace(1)
%out1, ptr addrspace(1) %out2) {
+; CHECK-LABEL: inline_multiple_wwf:
+; CHECK: ; %bb.0:
+; CHECK-NEXT:s_mov_b32 s1, simple_whole_wave_func@abs32@hi
+; CHECK-NEXT:s_mov_b32 s0, simple_whole_wave_func@abs32@lo
+; CHECK-NEXT:s_mov_b32 s32, 0
+; CHECK-NEXT:v_dual_mov_b32 v41, v5 :: v_dual_mov_b32 v44, v0
+; CHECK-NEXT:v_dual_mov_b32 v40, v4 :: v_dual_mov_b32 v43, v3
+; CHECK-NEXT:v_dual_mov_b32 v42, v2 :: v_dual_mov_b32 v45, v1
+; CHECK-NEXT:s_swappc_b64 s[30:31], s[0:1]
+; CHECK-NEXT:s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT:v_dual_mov_b32 v46, v0 :: v_dual_mov_b32 v1, v45
+; CHECK-NEXT:v_mov_b32_e32 v0, v44
+; CHECK-NEXT:s_mov_b32 s1, another_whole_wave_func@abs32@hi
+; CHECK-NEXT:s_mov_b32 s0, another_whole_wave_func@abs32@lo
+; CHECK-NEXT:s_wait_alu 0xfffe
+; CHECK-NEXT:s_swappc_b64 s[30:31], s[0:1]
+; CHECK-NEXT:global_store_b32 v[42:43], v46, off
+; CHECK-NEXT:global_store_b32 v[40:41], v0, off
+; CHECK-NEXT:s_endpgm
+ %result1 = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr
@simple_whole_wave_func, i32 %x)
+ %result2 = call i32(ptr, ...) @llvm.amdgcn.call.whole.wave(ptr
@another_whole_wave_func, i32 %x, i32 %y)
+ store i32 %result1, ptr addrspace(1) %out1
+ store i32 %result2, ptr addrspace(1) %out2
+ ret void
+}
+
cmc-rep wrote:
Could we also have a test that tests one WWF called by multiple kernels? I
believe it should work, still having a test like that is helpful.
https://github.com/llvm/llvm-project/pull/169476
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [compiler-rt] [llvm] release/21.x: [SPARC] Remove CCIfConsecutiveRegs for f128 returns (#170133) (PR #170580)
brad0 wrote: @efriedma-quic Ping. https://github.com/llvm/llvm-project/pull/170580 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)
https://github.com/kuhar approved this pull request. https://github.com/llvm/llvm-project/pull/170876 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize binary (PR #171149)
@@ -0,0 +1,46 @@ +# This test checks that BOLT can generate BTI landing pads for targets of stubs inserted in LongJmp. + +# REQUIRES: system-linux + +# RUN: %clang %s %cflags -Wl,-q -o %t -mbranch-protection=bti -Wl,-z,force-bti +# RUN: link_fdata --no-lbr %s %t %t.fdata +# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions \ +# RUN: --print-split --print-only foo --print-longjmp 2>&1 | FileCheck %s + +#CHECK: BOLT-INFO: Starting stub-insertion pass +#CHECK: Binary Function "foo" after long-jmp + +#CHECK: cmp x0, #0x0 +#CHECK-NEXT: Successors: .LStub0 + +#CHECK: adrpx16, .Ltmp0 +#CHECK-NEXT: add x16, x16, :lo12:.Ltmp0 +#CHECK-NEXT: br x16 # UNKNOWN CONTROL FLOW + +#CHECK: --- HOT-COLD SPLIT POINT --- + +#CHECK: bti c +#CHECK-NEXT: mov x0, #0x2 +#CHECK-NEXT: ret peterwaller-arm wrote: Consistency: all of the above should begin `# CHECK`. https://github.com/llvm/llvm-project/pull/171149 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize binary (PR #171149)
@@ -0,0 +1,35 @@ +# This test checks the situation where LongJmp adds a stub targeting an ignored (skipped) function. +# The problem is that by default BOLT cannot modify ignored functions, so it cannot add the needed BTI. + +# Current behaviour is to emit an error. + +# REQUIRES: system-linux, asserts + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ +# RUN: -mattr=+bti -aarch64-mark-bti-property %s -o %t.o +# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q -Wl,-z,force-bti +# RUN: not llvm-bolt %t.exe -o %t.bolt \ +# RUN: --align-text=0x1000 --skip-funcs=far_away_func 2>&1 | FileCheck %s + +# CHECK: BOLT-ERROR: Cannot add BTI landing pad to ignored function far_away_func + + .section .text + .global _start + .global far_away_func + + .align 4 + .global _start + .type _start, %function +_start: +bti c +bl far_away_func +ret + + .global far_away_func peterwaller-arm wrote: Worth a short comment to state that it is far away once bolt has run, because it is skipped, and why? https://github.com/llvm/llvm-project/pull/171149 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)
@@ -0,0 +1,262 @@
+//===-- AMDGPUMachineLevelInliner.cpp - AMDGPU Machine Level Inliner ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "AMDGPUMachineLevelInliner.h"
+#include "AMDGPU.h"
+#include "AMDGPUMachineModuleInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManagers.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-machine-level-inliner"
+
+namespace {
+class AMDGPUInliningPassManager : public FPPassManager {
+public:
+ static char ID;
+
+ explicit AMDGPUInliningPassManager() : FPPassManager(ID) {}
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool doFinalization(Module &M) override;
+
+ StringRef getPassName() const override {
+return "AMDGPU Inlining Pass Manager";
+ }
+};
+
+/// AMDGPUInliningAnchor - A machine function pass that serves as an anchor for
+/// setting up the AMDGPU inlining pass manager infrastructure. It makes sure
+/// the inliner is run via an AMDGPUInliningPassManager. It can be run well in
+/// advance of the inliner as long as there are only FunctionPasses in between.
+class AMDGPUInliningAnchor : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification
+
+ AMDGPUInliningAnchor() : MachineFunctionPass(ID) {}
+
+ // We don't really need to process any functions here.
+ bool runOnMachineFunction(MachineFunction &MF) override { return false; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ StringRef getPassName() const override;
+
+ /// Prepare the pass manager stack for the inliner. This will push an
+ /// `AMDGPUInliningPassManager` onto the stack.
+ void preparePassManager(PMStack &Stack) override;
+};
+
+} // end anonymous namespace.
+
+// Pass identification
+char AMDGPUMachineLevelInliner::ID = 0;
+char AMDGPUInliningPassManager::ID = 0;
+char AMDGPUInliningAnchor::ID = 0;
+
+char &llvm::AMDGPUMachineLevelInlinerID = AMDGPUMachineLevelInliner::ID;
+char &llvm::AMDGPUInliningAnchorID = AMDGPUInliningAnchor::ID;
+
+INITIALIZE_PASS_BEGIN(AMDGPUMachineLevelInliner, DEBUG_TYPE,
+ "AMDGPU Machine Level Inliner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUInliningAnchor)
+INITIALIZE_PASS_END(AMDGPUMachineLevelInliner, DEBUG_TYPE,
+"AMDGPU Machine Level Inliner", false, false)
+
+INITIALIZE_PASS_BEGIN(AMDGPUInliningAnchor, "amdgpu-inlining-anchor",
+ "AMDGPU Inlining Anchor", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_END(AMDGPUInliningAnchor, "amdgpu-inlining-anchor",
+"AMDGPU Inlining Anchor", false, true)
+
+AMDGPUMachineLevelInliner::AMDGPUMachineLevelInliner()
+: MachineFunctionPass(ID) {
+ initializeAMDGPUMachineLevelInlinerPass(*PassRegistry::getPassRegistry());
+}
+
+void AMDGPUMachineLevelInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired();
+ AU.addRequired();
+ AU.addPreserved();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool AMDGPUMachineLevelInliner::runOnMachineFunction(MachineFunction &MF) {
+ MachineModuleInfo &MMI =
getAnalysis().getMMI();
+
+ Function &F = MF.getFunction();
+ if (shouldInlineCallsTo(F)) {
cmc-rep wrote:
Suggestion: perhaps we should say "mayInlineCallsTo(F)". I am thinking about
more broader use of InlinePassManager?
https://github.com/llvm/llvm-project/pull/169476
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)
@@ -0,0 +1,262 @@
+//===-- AMDGPUMachineLevelInliner.cpp - AMDGPU Machine Level Inliner ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "AMDGPUMachineLevelInliner.h"
+#include "AMDGPU.h"
+#include "AMDGPUMachineModuleInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManagers.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-machine-level-inliner"
+
+namespace {
+class AMDGPUInliningPassManager : public FPPassManager {
+public:
+ static char ID;
+
+ explicit AMDGPUInliningPassManager() : FPPassManager(ID) {}
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool doFinalization(Module &M) override;
+
+ StringRef getPassName() const override {
+return "AMDGPU Inlining Pass Manager";
+ }
+};
+
+/// AMDGPUInliningAnchor - A machine function pass that serves as an anchor for
+/// setting up the AMDGPU inlining pass manager infrastructure. It makes sure
+/// the inliner is run via an AMDGPUInliningPassManager. It can be run well in
+/// advance of the inliner as long as there are only FunctionPasses in between.
+class AMDGPUInliningAnchor : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification
+
+ AMDGPUInliningAnchor() : MachineFunctionPass(ID) {}
+
+ // We don't really need to process any functions here.
+ bool runOnMachineFunction(MachineFunction &MF) override { return false; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ StringRef getPassName() const override;
+
+ /// Prepare the pass manager stack for the inliner. This will push an
+ /// `AMDGPUInliningPassManager` onto the stack.
+ void preparePassManager(PMStack &Stack) override;
+};
+
+} // end anonymous namespace.
+
+// Pass identification
+char AMDGPUMachineLevelInliner::ID = 0;
+char AMDGPUInliningPassManager::ID = 0;
+char AMDGPUInliningAnchor::ID = 0;
+
+char &llvm::AMDGPUMachineLevelInlinerID = AMDGPUMachineLevelInliner::ID;
+char &llvm::AMDGPUInliningAnchorID = AMDGPUInliningAnchor::ID;
+
+INITIALIZE_PASS_BEGIN(AMDGPUMachineLevelInliner, DEBUG_TYPE,
+ "AMDGPU Machine Level Inliner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUInliningAnchor)
+INITIALIZE_PASS_END(AMDGPUMachineLevelInliner, DEBUG_TYPE,
+"AMDGPU Machine Level Inliner", false, false)
+
+INITIALIZE_PASS_BEGIN(AMDGPUInliningAnchor, "amdgpu-inlining-anchor",
+ "AMDGPU Inlining Anchor", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_END(AMDGPUInliningAnchor, "amdgpu-inlining-anchor",
+"AMDGPU Inlining Anchor", false, true)
+
+AMDGPUMachineLevelInliner::AMDGPUMachineLevelInliner()
+: MachineFunctionPass(ID) {
+ initializeAMDGPUMachineLevelInlinerPass(*PassRegistry::getPassRegistry());
+}
+
+void AMDGPUMachineLevelInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired();
+ AU.addRequired();
+ AU.addPreserved();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool AMDGPUMachineLevelInliner::runOnMachineFunction(MachineFunction &MF) {
+ MachineModuleInfo &MMI =
getAnalysis().getMMI();
+
+ Function &F = MF.getFunction();
+ if (shouldInlineCallsTo(F)) {
+// Mark the function as machine-inlined in AMDGPUMachineModuleInfo. This
+// tells the inlining pass manager to stop processing it.
+auto &AMMMI = MMI.getObjFileInfo();
+AMMMI.addMachineInlinedFunction(F);
cmc-rep wrote:
Maybe the name could be "addMachineIiningCandidate(F)"? Again I am thinking
about potentially more broader use of the InlinePassManager.
https://github.com/llvm/llvm-project/pull/169476
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)
https://github.com/cmc-rep edited https://github.com/llvm/llvm-project/pull/169476 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)
https://github.com/cmc-rep edited https://github.com/llvm/llvm-project/pull/169476 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)
aengelke wrote: We don't provide any API/ABI stability across LLVM versions, so APIVersion seems rather useless in practice anyway. If there's an APIVersion mismatch, the plugin is built against an incompatible LLVM version anyway... Fixing this properly would require to stop returning the info struct by value, which would require changes for all users, which I wanted to avoid. https://github.com/llvm/llvm-project/pull/170846 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)
@@ -1017,16 +1018,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
}
#endif
}
- // Attempt to load pass plugins and register their callbacks with PB.
- for (auto &PluginFN : CodeGenOpts.PassPlugins) {
-auto PassPlugin = PassPlugin::Load(PluginFN);
-if (PassPlugin) {
- PassPlugin->registerPassBuilderCallbacks(PB);
-} else {
- Diags.Report(diag::err_fe_unable_to_load_plugin)
- << PluginFN << toString(PassPlugin.takeError());
-}
- }
+ // Register plugin callbacks with PB.
+ for (auto &Plugin : Plugins)
+Plugin.registerPassBuilderCallbacks(PB);
aengelke wrote:
*Need* no, they continue to work just fine.
- LTO: yes, might be good to have at some point in the future.
- Flang: likewise.
- clang-linker-wrapper: if there's interest, this can be added. While it
currently seems to load plugins, it doesn't seem to call any methods on them.
- MLIR ModuleToObject::translateToISA also only seems to be used for GPUs and
MLIR doesn't do anything related to plugins right now.
opt doesn't call and lld doesn't directly call back-ends.
https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)
https://github.com/atrosinenko updated
https://github.com/llvm/llvm-project/pull/170876
>From 52dbe766a39af89c0cc198730b3933e679733bed Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko
Date: Fri, 5 Dec 2025 16:08:28 +0300
Subject: [PATCH] [ADT] Make use of subsetOf and anyCommon methods of BitVector
(NFC)
Replace the code along these lines
BitVector Tmp = LHS;
Tmp &= RHS;
return Tmp.any();
and
BitVector Tmp = LHS;
Tmp.reset(RHS);
return Tmp.none();
with `LHS.anyCommon(RHS)` and `LHS.subsetOf(RHS)`, correspondingly, which
do not require creating temporary BitVector and can return early.
---
bolt/include/bolt/Passes/LivenessAnalysis.h | 5 ++---
bolt/include/bolt/Passes/ReachingDefOrUse.h | 3 +--
bolt/lib/Passes/RegReAssign.cpp | 8 ++--
bolt/lib/Passes/ShrinkWrapping.cpp| 10 --
bolt/lib/Passes/StackAvailableExpressions.cpp | 3 +--
bolt/lib/Passes/TailDuplication.cpp | 8
llvm/lib/CodeGen/RDFRegisters.cpp | 6 ++
llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp | 6 ++
8 files changed, 18 insertions(+), 31 deletions(-)
diff --git a/bolt/include/bolt/Passes/LivenessAnalysis.h
b/bolt/include/bolt/Passes/LivenessAnalysis.h
index 1df1113318d0b..f4faa1dc34ecd 100644
--- a/bolt/include/bolt/Passes/LivenessAnalysis.h
+++ b/bolt/include/bolt/Passes/LivenessAnalysis.h
@@ -37,10 +37,9 @@ class LivenessAnalysis : public
DataflowAnalysisgetStateAt(PP));
+const BitVector &BV = *this->getStateAt(PP);
const BitVector &RegAliases = BC.MIB->getAliases(Reg);
-BV &= RegAliases;
-return BV.any();
+return BV.anyCommon(RegAliases);
}
void run() { Parent::run(); }
diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h
b/bolt/include/bolt/Passes/ReachingDefOrUse.h
index 585d673e3b84e..41a6091aad4cb 100644
--- a/bolt/include/bolt/Passes/ReachingDefOrUse.h
+++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h
@@ -133,8 +133,7 @@ class ReachingDefOrUse
RA.getInstClobberList(Point, Regs);
else
RA.getInstUsedRegsList(Point, Regs, false);
-Regs &= this->BC.MIB->getAliases(*TrackingReg);
-if (Regs.any())
+if (Regs.anyCommon(this->BC.MIB->getAliases(*TrackingReg)))
Next.set(this->ExprToIdx[&Point]);
}
}
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 0859cd244ce40..54eff51bfee68 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -316,18 +316,14 @@ void
RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
break;
}
-BitVector AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ClassicReg))) {
LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
<< " with " << BC.MRI->getName(ExtReg)
<< " because classic reg is alive\n");
--End;
continue;
}
-AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ExtReg))) {
LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
<< " with " << BC.MRI->getName(ExtReg)
<< " because extended reg is alive\n");
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp
b/bolt/lib/Passes/ShrinkWrapping.cpp
index fe342ccd38a67..b882e2512866d 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -1100,9 +1100,8 @@ SmallVector
ShrinkWrapping::fixPopsPlacements(
bool Found = false;
if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
SaveOffset) {
- BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
- BV &= UsesByReg[CSR];
- if (!BV.any()) {
+ const BitVector &BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
+ if (!BV.anyCommon(UsesByReg[CSR])) {
Found = true;
PP = BB;
continue;
@@ -1110,9 +1109,8 @@ SmallVector
ShrinkWrapping::fixPopsPlacements(
}
for (MCInst &Inst : llvm::reverse(*BB)) {
if (SPT.getStateBefore(Inst)->first == SaveOffset) {
-BitVector BV = *RI.getStateAt(Inst);
-BV &= UsesByReg[CSR];
-if (!BV.any()) {
+const BitVector &BV = *RI.getStateAt(Inst);
+if (!BV.anyCommon(UsesByReg[CSR])) {
Found = true;
PP = &Inst;
break;
diff --git a/bolt/lib/Passes/StackAvailableExpressions.cpp
b/bolt/lib/Passes/StackAvailableExpressions.cpp
index a0d361f273de2..c685cc19badc3 100644
--- a/bolt/lib/Passes/StackAvailableExpressions.cpp
+++ b/bolt/lib/Passes/StackAvailableExpressions.cpp
@@ -103,8 +103,7 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst
*X
[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)
https://github.com/atrosinenko updated
https://github.com/llvm/llvm-project/pull/170876
>From 52dbe766a39af89c0cc198730b3933e679733bed Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko
Date: Fri, 5 Dec 2025 16:08:28 +0300
Subject: [PATCH] [ADT] Make use of subsetOf and anyCommon methods of BitVector
(NFC)
Replace the code along these lines
BitVector Tmp = LHS;
Tmp &= RHS;
return Tmp.any();
and
BitVector Tmp = LHS;
Tmp.reset(RHS);
return Tmp.none();
with `LHS.anyCommon(RHS)` and `LHS.subsetOf(RHS)`, correspondingly, which
do not require creating temporary BitVector and can return early.
---
bolt/include/bolt/Passes/LivenessAnalysis.h | 5 ++---
bolt/include/bolt/Passes/ReachingDefOrUse.h | 3 +--
bolt/lib/Passes/RegReAssign.cpp | 8 ++--
bolt/lib/Passes/ShrinkWrapping.cpp| 10 --
bolt/lib/Passes/StackAvailableExpressions.cpp | 3 +--
bolt/lib/Passes/TailDuplication.cpp | 8
llvm/lib/CodeGen/RDFRegisters.cpp | 6 ++
llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp | 6 ++
8 files changed, 18 insertions(+), 31 deletions(-)
diff --git a/bolt/include/bolt/Passes/LivenessAnalysis.h
b/bolt/include/bolt/Passes/LivenessAnalysis.h
index 1df1113318d0b..f4faa1dc34ecd 100644
--- a/bolt/include/bolt/Passes/LivenessAnalysis.h
+++ b/bolt/include/bolt/Passes/LivenessAnalysis.h
@@ -37,10 +37,9 @@ class LivenessAnalysis : public
DataflowAnalysisgetStateAt(PP));
+const BitVector &BV = *this->getStateAt(PP);
const BitVector &RegAliases = BC.MIB->getAliases(Reg);
-BV &= RegAliases;
-return BV.any();
+return BV.anyCommon(RegAliases);
}
void run() { Parent::run(); }
diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h
b/bolt/include/bolt/Passes/ReachingDefOrUse.h
index 585d673e3b84e..41a6091aad4cb 100644
--- a/bolt/include/bolt/Passes/ReachingDefOrUse.h
+++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h
@@ -133,8 +133,7 @@ class ReachingDefOrUse
RA.getInstClobberList(Point, Regs);
else
RA.getInstUsedRegsList(Point, Regs, false);
-Regs &= this->BC.MIB->getAliases(*TrackingReg);
-if (Regs.any())
+if (Regs.anyCommon(this->BC.MIB->getAliases(*TrackingReg)))
Next.set(this->ExprToIdx[&Point]);
}
}
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 0859cd244ce40..54eff51bfee68 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -316,18 +316,14 @@ void
RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
break;
}
-BitVector AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ClassicReg))) {
LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
<< " with " << BC.MRI->getName(ExtReg)
<< " because classic reg is alive\n");
--End;
continue;
}
-AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ExtReg))) {
LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
<< " with " << BC.MRI->getName(ExtReg)
<< " because extended reg is alive\n");
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp
b/bolt/lib/Passes/ShrinkWrapping.cpp
index fe342ccd38a67..b882e2512866d 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -1100,9 +1100,8 @@ SmallVector
ShrinkWrapping::fixPopsPlacements(
bool Found = false;
if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
SaveOffset) {
- BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
- BV &= UsesByReg[CSR];
- if (!BV.any()) {
+ const BitVector &BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
+ if (!BV.anyCommon(UsesByReg[CSR])) {
Found = true;
PP = BB;
continue;
@@ -1110,9 +1109,8 @@ SmallVector
ShrinkWrapping::fixPopsPlacements(
}
for (MCInst &Inst : llvm::reverse(*BB)) {
if (SPT.getStateBefore(Inst)->first == SaveOffset) {
-BitVector BV = *RI.getStateAt(Inst);
-BV &= UsesByReg[CSR];
-if (!BV.any()) {
+const BitVector &BV = *RI.getStateAt(Inst);
+if (!BV.anyCommon(UsesByReg[CSR])) {
Found = true;
PP = &Inst;
break;
diff --git a/bolt/lib/Passes/StackAvailableExpressions.cpp
b/bolt/lib/Passes/StackAvailableExpressions.cpp
index a0d361f273de2..c685cc19badc3 100644
--- a/bolt/lib/Passes/StackAvailableExpressions.cpp
+++ b/bolt/lib/Passes/StackAvailableExpressions.cpp
@@ -103,8 +103,7 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst
*X
[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)
https://github.com/atrosinenko updated
https://github.com/llvm/llvm-project/pull/170876
>From 24957f7595d80072554580edf18802aadc30c303 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko
Date: Fri, 5 Dec 2025 16:08:28 +0300
Subject: [PATCH] [ADT] Make use of subsetOf and anyCommon methods of BitVector
(NFC)
Replace the code along these lines
BitVector Tmp = LHS;
Tmp &= RHS;
return Tmp.any();
and
BitVector Tmp = LHS;
Tmp.reset(RHS);
return Tmp.none();
with `LHS.anyCommon(RHS)` and `LHS.subsetOf(RHS)`, correspondingly, which
do not require creating temporary BitVector and can return early.
---
bolt/include/bolt/Passes/LivenessAnalysis.h | 5 ++---
bolt/include/bolt/Passes/ReachingDefOrUse.h | 3 +--
bolt/lib/Passes/RegReAssign.cpp | 8 ++--
bolt/lib/Passes/ShrinkWrapping.cpp| 10 --
bolt/lib/Passes/StackAvailableExpressions.cpp | 3 +--
bolt/lib/Passes/TailDuplication.cpp | 8
llvm/lib/CodeGen/RDFRegisters.cpp | 6 ++
llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp | 6 ++
8 files changed, 18 insertions(+), 31 deletions(-)
diff --git a/bolt/include/bolt/Passes/LivenessAnalysis.h
b/bolt/include/bolt/Passes/LivenessAnalysis.h
index 1df1113318d0b..f4faa1dc34ecd 100644
--- a/bolt/include/bolt/Passes/LivenessAnalysis.h
+++ b/bolt/include/bolt/Passes/LivenessAnalysis.h
@@ -37,10 +37,9 @@ class LivenessAnalysis : public
DataflowAnalysisgetStateAt(PP));
+const BitVector &BV = *this->getStateAt(PP);
const BitVector &RegAliases = BC.MIB->getAliases(Reg);
-BV &= RegAliases;
-return BV.any();
+return BV.anyCommon(RegAliases);
}
void run() { Parent::run(); }
diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h
b/bolt/include/bolt/Passes/ReachingDefOrUse.h
index 585d673e3b84e..41a6091aad4cb 100644
--- a/bolt/include/bolt/Passes/ReachingDefOrUse.h
+++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h
@@ -133,8 +133,7 @@ class ReachingDefOrUse
RA.getInstClobberList(Point, Regs);
else
RA.getInstUsedRegsList(Point, Regs, false);
-Regs &= this->BC.MIB->getAliases(*TrackingReg);
-if (Regs.any())
+if (Regs.anyCommon(this->BC.MIB->getAliases(*TrackingReg)))
Next.set(this->ExprToIdx[&Point]);
}
}
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 0859cd244ce40..54eff51bfee68 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -316,18 +316,14 @@ void
RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
break;
}
-BitVector AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ClassicReg))) {
LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
<< " with " << BC.MRI->getName(ExtReg)
<< " because classic reg is alive\n");
--End;
continue;
}
-AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ExtReg))) {
LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
<< " with " << BC.MRI->getName(ExtReg)
<< " because extended reg is alive\n");
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp
b/bolt/lib/Passes/ShrinkWrapping.cpp
index fe342ccd38a67..b882e2512866d 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -1100,9 +1100,8 @@ SmallVector
ShrinkWrapping::fixPopsPlacements(
bool Found = false;
if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
SaveOffset) {
- BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
- BV &= UsesByReg[CSR];
- if (!BV.any()) {
+ const BitVector &BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
+ if (!BV.anyCommon(UsesByReg[CSR])) {
Found = true;
PP = BB;
continue;
@@ -1110,9 +1109,8 @@ SmallVector
ShrinkWrapping::fixPopsPlacements(
}
for (MCInst &Inst : llvm::reverse(*BB)) {
if (SPT.getStateBefore(Inst)->first == SaveOffset) {
-BitVector BV = *RI.getStateAt(Inst);
-BV &= UsesByReg[CSR];
-if (!BV.any()) {
+const BitVector &BV = *RI.getStateAt(Inst);
+if (!BV.anyCommon(UsesByReg[CSR])) {
Found = true;
PP = &Inst;
break;
diff --git a/bolt/lib/Passes/StackAvailableExpressions.cpp
b/bolt/lib/Passes/StackAvailableExpressions.cpp
index a0d361f273de2..c685cc19badc3 100644
--- a/bolt/lib/Passes/StackAvailableExpressions.cpp
+++ b/bolt/lib/Passes/StackAvailableExpressions.cpp
@@ -103,8 +103,7 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst
*X
[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)
https://github.com/atrosinenko updated
https://github.com/llvm/llvm-project/pull/170876
>From 24957f7595d80072554580edf18802aadc30c303 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko
Date: Fri, 5 Dec 2025 16:08:28 +0300
Subject: [PATCH] [ADT] Make use of subsetOf and anyCommon methods of BitVector
(NFC)
Replace the code along these lines
BitVector Tmp = LHS;
Tmp &= RHS;
return Tmp.any();
and
BitVector Tmp = LHS;
Tmp.reset(RHS);
return Tmp.none();
with `LHS.anyCommon(RHS)` and `LHS.subsetOf(RHS)`, correspondingly, which
do not require creating temporary BitVector and can return early.
---
bolt/include/bolt/Passes/LivenessAnalysis.h | 5 ++---
bolt/include/bolt/Passes/ReachingDefOrUse.h | 3 +--
bolt/lib/Passes/RegReAssign.cpp | 8 ++--
bolt/lib/Passes/ShrinkWrapping.cpp| 10 --
bolt/lib/Passes/StackAvailableExpressions.cpp | 3 +--
bolt/lib/Passes/TailDuplication.cpp | 8
llvm/lib/CodeGen/RDFRegisters.cpp | 6 ++
llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp | 6 ++
8 files changed, 18 insertions(+), 31 deletions(-)
diff --git a/bolt/include/bolt/Passes/LivenessAnalysis.h
b/bolt/include/bolt/Passes/LivenessAnalysis.h
index 1df1113318d0b..f4faa1dc34ecd 100644
--- a/bolt/include/bolt/Passes/LivenessAnalysis.h
+++ b/bolt/include/bolt/Passes/LivenessAnalysis.h
@@ -37,10 +37,9 @@ class LivenessAnalysis : public
DataflowAnalysisgetStateAt(PP));
+const BitVector &BV = *this->getStateAt(PP);
const BitVector &RegAliases = BC.MIB->getAliases(Reg);
-BV &= RegAliases;
-return BV.any();
+return BV.anyCommon(RegAliases);
}
void run() { Parent::run(); }
diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h
b/bolt/include/bolt/Passes/ReachingDefOrUse.h
index 585d673e3b84e..41a6091aad4cb 100644
--- a/bolt/include/bolt/Passes/ReachingDefOrUse.h
+++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h
@@ -133,8 +133,7 @@ class ReachingDefOrUse
RA.getInstClobberList(Point, Regs);
else
RA.getInstUsedRegsList(Point, Regs, false);
-Regs &= this->BC.MIB->getAliases(*TrackingReg);
-if (Regs.any())
+if (Regs.anyCommon(this->BC.MIB->getAliases(*TrackingReg)))
Next.set(this->ExprToIdx[&Point]);
}
}
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 0859cd244ce40..54eff51bfee68 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -316,18 +316,14 @@ void
RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
break;
}
-BitVector AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ClassicReg))) {
LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
<< " with " << BC.MRI->getName(ExtReg)
<< " because classic reg is alive\n");
--End;
continue;
}
-AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ExtReg))) {
LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
<< " with " << BC.MRI->getName(ExtReg)
<< " because extended reg is alive\n");
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp
b/bolt/lib/Passes/ShrinkWrapping.cpp
index fe342ccd38a67..b882e2512866d 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -1100,9 +1100,8 @@ SmallVector
ShrinkWrapping::fixPopsPlacements(
bool Found = false;
if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
SaveOffset) {
- BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
- BV &= UsesByReg[CSR];
- if (!BV.any()) {
+ const BitVector &BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
+ if (!BV.anyCommon(UsesByReg[CSR])) {
Found = true;
PP = BB;
continue;
@@ -1110,9 +1109,8 @@ SmallVector
ShrinkWrapping::fixPopsPlacements(
}
for (MCInst &Inst : llvm::reverse(*BB)) {
if (SPT.getStateBefore(Inst)->first == SaveOffset) {
-BitVector BV = *RI.getStateAt(Inst);
-BV &= UsesByReg[CSR];
-if (!BV.any()) {
+const BitVector &BV = *RI.getStateAt(Inst);
+if (!BV.anyCommon(UsesByReg[CSR])) {
Found = true;
PP = &Inst;
break;
diff --git a/bolt/lib/Passes/StackAvailableExpressions.cpp
b/bolt/lib/Passes/StackAvailableExpressions.cpp
index a0d361f273de2..c685cc19badc3 100644
--- a/bolt/lib/Passes/StackAvailableExpressions.cpp
+++ b/bolt/lib/Passes/StackAvailableExpressions.cpp
@@ -103,8 +103,7 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst
*X
[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)
https://github.com/weliveindetail edited https://github.com/llvm/llvm-project/pull/170846 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)
@@ -1017,16 +1018,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
}
#endif
}
- // Attempt to load pass plugins and register their callbacks with PB.
- for (auto &PluginFN : CodeGenOpts.PassPlugins) {
-auto PassPlugin = PassPlugin::Load(PluginFN);
-if (PassPlugin) {
- PassPlugin->registerPassBuilderCallbacks(PB);
-} else {
- Diags.Report(diag::err_fe_unable_to_load_plugin)
- << PluginFN << toString(PassPlugin.takeError());
-}
- }
+ // Register plugin callbacks with PB.
+ for (auto &Plugin : Plugins)
+Plugin.registerPassBuilderCallbacks(PB);
weliveindetail wrote:
What about the other places in-tree that load plugins: Flang,
clang-linker-wrapper, libLTO, opt and lld? Do they need adjustment?
https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)
@@ -49,7 +53,14 @@ struct PassPluginLibraryInfo {
/// The callback for registering plugin passes with a \c PassBuilder
/// instance
- void (*RegisterPassBuilderCallbacks)(PassBuilder &);
+ void (*RegisterPassBuilderCallbacks)(PassBuilder &) = nullptr;
+
+ /// Callback called before running the back-end passes on the module. The
+ /// callback can generate code itself by writing the expected output to OS
and
+ /// returning true to prevent the default pipeline and further plugin
+ /// callbacks from running.
+ bool (*PreCodeGenCallback)(Module &, TargetMachine &, CodeGenFileType,
weliveindetail wrote:
I like the idea to have pre/post opt/codegen callbacks. If we ever get the new
pass manager in the Codegen pipeline, that would help plugins to handle the
pipeline change. Could we generalize this into something like
`registerPipelinePhaseCallbacks()`?
https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)
https://github.com/weliveindetail commented: Please note that this is the first-ever change in the pass-plugin API. This isn't a breaking change, but we might need them if we need to fix things down the line. This change will also affect lld as well as out-of-tree frontends like Rust and Swift. https://github.com/llvm/llvm-project/pull/170846 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)
weliveindetail wrote: Should we handle "version 1" plugins explicitly here? The `PassPluginLibraryInfo` structs that plugins return today, have one member less. Copying them into the "version 2" `Info` stack slot might pull in uninitialzed memory for the newly introduced function pointer. https://github.com/llvm/llvm-project/pull/170846 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [RFC][LLVM][Clang] Add LLVM plugin hook for back-ends (PR #170846)
@@ -49,7 +53,14 @@ struct PassPluginLibraryInfo {
/// The callback for registering plugin passes with a \c PassBuilder
/// instance
- void (*RegisterPassBuilderCallbacks)(PassBuilder &);
+ void (*RegisterPassBuilderCallbacks)(PassBuilder &) = nullptr;
+
+ /// Callback called before running the back-end passes on the module. The
+ /// callback can generate code itself by writing the expected output to OS
and
+ /// returning true to prevent the default pipeline and further plugin
+ /// callbacks from running.
+ bool (*PreCodeGenCallback)(Module &, TargetMachine &, CodeGenFileType,
aengelke wrote:
I would consider this to be out-of-scope for now. Back-end pipelines are,
currently, target-specific and any support for plugins modifying these
pipelines is likely to need a lot of further discussion (esp. as many back-end
passes not really self-contained). There was some discussion on
CodeGenPassBuilder to add callbacks for targets earlier this year, but it seems
work on that has stalled. Additionally, the proposed plugin API already permits
building and running a custom pass manager, so right now there would be no
benefit of speculatively building a more generic API.
If there's a desire to add external hooks for CodeGenPassBuilder, I think this
should be a separate function similar to the RegisterPassBuilderCallbacks that
exists so far.
https://github.com/llvm/llvm-project/pull/170846
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [llvm-rc] Don't interpret integer literals as octal numbers in rc.exe mode (#166915) (PR #167174)
dyung wrote: > Are we merging this in 21.x or should we close? Sorry for the late reply, but it was decided we would not take this change in the 21.x release branch. https://github.com/llvm/llvm-project/pull/167174 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/21.x: [clang-format] Don't swap `(const override)` with QAS_Right (#167191) (PR #170966)
dyung wrote: Any idea how long this bug has existed? https://github.com/llvm/llvm-project/pull/170966 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 75437ec - Revert "[AMDGPU] Enable i8 GEP promotion for vector allocas (#166132)"
Author: Jan Patrick Lehr
Date: 2025-12-08T08:55:16+01:00
New Revision: 75437ec7cf23b705b336d6432d77159f450e62cf
URL:
https://github.com/llvm/llvm-project/commit/75437ec7cf23b705b336d6432d77159f450e62cf
DIFF:
https://github.com/llvm/llvm-project/commit/75437ec7cf23b705b336d6432d77159f450e62cf.diff
LOG: Revert "[AMDGPU] Enable i8 GEP promotion for vector allocas (#166132)"
This reverts commit 6ec8c4351cfc1d0627d1633b02ea787bd29c77d8.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
Removed:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index bab76e87af40c..b79689c39ef84 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -457,25 +457,10 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP,
AllocaInst *Alloca,
const auto &VarOffset = VarOffsets.front();
APInt OffsetQuot;
APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
- Value *Offset = VarOffset.first;
- if (Rem != 0) {
-unsigned ElemSizeShift = Log2_64(VecElemSize);
-SimplifyQuery SQ(DL);
-SQ.CxtI = GEP;
-KnownBits KB = computeKnownBits(VarOffset.first, SQ);
-// Bail out if the index may point into the middle of an element.
-if (KB.countMinTrailingZeros() < ElemSizeShift)
- return nullptr;
-
-Value *Scaled = Builder.CreateLShr(VarOffset.first, ElemSizeShift);
-if (Instruction *NewInst = dyn_cast(Scaled))
- NewInsts.push_back(NewInst);
-
-Offset = Scaled;
-OffsetQuot = APInt(BW, 1);
-Rem = 0;
- }
+ if (Rem != 0 || OffsetQuot.isZero())
+return nullptr;
+ Value *Offset = VarOffset.first;
if (!isa(Offset->getType()))
return nullptr;
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
index bcc61062640d2..76e1868b3c4b9 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll
@@ -250,150 +250,6 @@ bb2:
store i32 0, ptr addrspace(5) %extractelement
ret void
}
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_0_or_4(ptr %buffer,
float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_0_or_4(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT:[[ALLOCA:%.*]] = freeze <3 x float> poison
-; CHECK-NEXT:[[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT:[[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 0, i32 4
-; CHECK-NEXT:[[TMP1:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT:[[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float
[[DATA]], i32 [[TMP1]]
-; CHECK-NEXT:store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
-; CHECK-NEXT:ret void
-;
- %alloca = alloca <3 x float>, align 16, addrspace(5)
- %vec = load <3 x float>, ptr %buffer
- store <3 x float> %vec, ptr addrspace(5) %alloca
- %index = select i1 %idx_sel, i32 0, i32 4
- %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
- store float %data, ptr addrspace(5) %elt, align 4
- %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
- store <3 x float> %updated, ptr %buffer, align 16
- ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_8(ptr %buffer,
float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8_4_or_8(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT:[[ALLOCA:%.*]] = freeze <3 x float> poison
-; CHECK-NEXT:[[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
-; CHECK-NEXT:[[INDEX:%.*]] = select i1 [[IDX_SEL]], i32 4, i32 8
-; CHECK-NEXT:[[TMP1:%.*]] = lshr i32 [[INDEX]], 2
-; CHECK-NEXT:[[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float
[[DATA]], i32 [[TMP1]]
-; CHECK-NEXT:store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
-; CHECK-NEXT:ret void
-;
- %alloca = alloca <3 x float>, align 16, addrspace(5)
- %vec = load <3 x float>, ptr %buffer
- store <3 x float> %vec, ptr addrspace(5) %alloca
- %index = select i1 %idx_sel, i32 4, i32 8
- %elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
- store float %data, ptr addrspace(5) %elt, align 4
- %updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
- store <3 x float> %updated, ptr %buffer, align 16
- ret void
-}
-
-define amdgpu_kernel void @scalar_alloca_nested_vector_gep_i8_4_or_8(ptr
%buffer, float %data, i1 %idx_sel) {
-; CHECK-LABEL: define amdgpu_kernel void
@scalar_alloca_nested_vector_gep_i8_4_or_8(
-; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i1 [[IDX_SEL:%.*]]) {
-; CHECK-NEXT:[[ALLOCA:%.*]] = freeze <8 x float> poison
[llvm-branch-commits] [llvm] release/21.x: [WebAssembly] Remove FAKE_USEs before ExplicitLocals (#160768) (PR #171184)
https://github.com/llvmbot created
https://github.com/llvm/llvm-project/pull/171184
Backport e5b2a06546eb20662156b8a59b77aca086301486
Requested by: @dschuff
>From 3c97f983e2a9b3fe1c8c9472bc40e05301f30179 Mon Sep 17 00:00:00 2001
From: Heejin Ahn
Date: Thu, 25 Sep 2025 14:49:25 -0700
Subject: [PATCH] [WebAssembly] Remove FAKE_USEs before ExplicitLocals
(#160768)
`FAKE_USE`s are essentially no-ops, so they have to be removed before
running ExplicitLocals so that `drop`s will be correctly inserted to
drop those values used by the `FAKE_USE`s.
---
This is reapplication of #160228, which broke Wasm waterfall. This PR
additionally prevents `FAKE_USE`s uses from being stackified.
Previously, a 'def' whose first use was a `FAKE_USE` was able to be
stackified as `TEE`:
- Before
```
Reg = INST ...// Def
FAKE_USE ..., Reg, ...// Insert
INST ..., Reg, ...
INST ..., Reg, ...
```
- After RegStackify
```
DefReg = INST ...// Def
TeeReg, Reg = TEE ... DefReg
FAKE_USE ..., TeeReg, ...// Insert
INST ..., Reg, ...
INST ..., Reg, ...
```
And this assumes `DefReg` and `TeeReg` are stackified.
But this PR removes `FAKE_USE`s in the beginning of ExplicitLocals. And
later in ExplicitLocals we have a routine to unstackify registers that
have no uses left:
https://github.com/llvm/llvm-project/blob/7b28fcd2b182ba2c9d2d71c386be92fc0ee3cc9d/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp#L257-L269
(This was added in #149626. Then it didn't seem it would trigger the
same assertions for `TEE`s because it was fixing the bug where a
terminator was removed in CFGSort (#149097).
Details here:
https://github.com/llvm/llvm-project/pull/149432#issuecomment-3091444141)
- After `FAKE_USE` removal and unstackification
```
DefReg = INST ...
TeeReg, Reg = TEE ... DefReg
INST ..., Reg, ...
INST ..., Reg, ...
```
And now `TeeReg` is unstackified. This triggered the assertion here,
that `TeeReg` should be stackified:
https://github.com/llvm/llvm-project/blob/7b28fcd2b182ba2c9d2d71c386be92fc0ee3cc9d/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp#L316
This prevents `FAKE_USE`s' uses from being stackified altogether,
including `TEE` transformation. Even when it is not a `TEE`
transformation and just a single use stackification, it does not trigger
the assertion but there's no point stackifying it given that it will be
deleted.
---
Fixes https://github.com/emscripten-core/emscripten/issues/25301.
(cherry picked from commit e5b2a06546eb20662156b8a59b77aca086301486)
---
.../WebAssembly/WebAssemblyExplicitLocals.cpp | 14 +++
.../WebAssembly/WebAssemblyRegStackify.cpp| 4 +++
llvm/test/CodeGen/WebAssembly/fake-use.ll | 25 +++
3 files changed, 43 insertions(+)
create mode 100644 llvm/test/CodeGen/WebAssembly/fake-use.ll
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index e6486e247209b..5c3127e2d3dc6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -216,6 +216,18 @@ static MachineInstr *findStartOfTree(MachineOperand &MO,
return Def;
}
+// FAKE_USEs are no-ops, so remove them here so that the values used by them
+// will be correctly dropped later.
+static void removeFakeUses(MachineFunction &MF) {
+ SmallVector ToDelete;
+ for (auto &MBB : MF)
+for (auto &MI : MBB)
+ if (MI.isFakeUse())
+ToDelete.push_back(&MI);
+ for (auto *MI : ToDelete)
+MI->eraseFromParent();
+}
+
bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "** Make Locals Explicit **\n"
"** Function: "
@@ -226,6 +238,8 @@ bool
WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
WebAssemblyFunctionInfo &MFI = *MF.getInfo();
const auto *TII = MF.getSubtarget().getInstrInfo();
+ removeFakeUses(MF);
+
// Map non-stackified virtual registers to their local ids.
DenseMap Reg2Local;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index bc91c6424b63e..fd13ef9a1921d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -866,6 +866,10 @@ bool
WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
if (Insert->isDebugValue())
continue;
+ // Ignore FAKE_USEs, which are no-ops and will be deleted later.
+ if (Insert->isFakeUse())
+continue;
+
// Iterate through the inputs in reverse order, since we'll be pulling
// operands off the stack in LIFO order.
CommutingState Commuting;
diff --git a/llvm/test/CodeGen/WebAssembly/fake-use.ll
b/llvm/test/CodeGen/WebAssembly/fake-use.ll
new file mode 100644
index 0..a18ce33566df0
--- /dev/null
+++
[llvm-branch-commits] [llvm] release/21.x: [WebAssembly] Remove FAKE_USEs before ExplicitLocals (#160768) (PR #171184)
llvmbot wrote: @dschuff What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/171184 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][ELF][AArch64] Replace memtag hack with less-confusing code (PR #171182)
@@ -741,7 +741,7 @@ static void addRelativeReloc(Ctx &ctx, InputSectionBase
&isec,
//
https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative
if (sym.isTagged() && !isAArch64Auth &&
(addend < 0 || static_cast(addend) >= sym.getSize()))
-isec.addReloc({expr, type, offsetInSec, addend, &sym});
+isec.addReloc({R_ADDEND_NEG, type, offsetInSec, addend, &sym});
jrtc27 wrote:
But I'm deferring that until
https://github.com/llvm/llvm-project/pull/171180/files#r2599706437 is resolved,
since this gets rather uglier if `!isAArch64Auth` needs to stay
https://github.com/llvm/llvm-project/pull/171182
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][ELF][AArch64] Replace memtag hack with less-confusing code (PR #171182)
@@ -741,7 +741,7 @@ static void addRelativeReloc(Ctx &ctx, InputSectionBase
&isec,
//
https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative
if (sym.isTagged() && !isAArch64Auth &&
(addend < 0 || static_cast(addend) >= sym.getSize()))
-isec.addReloc({expr, type, offsetInSec, addend, &sym});
+isec.addReloc({R_ADDEND_NEG, type, offsetInSec, addend, &sym});
jrtc27 wrote:
I'd like to move this into RelocationBaseSection::addReloc itself, like how is
done for Elf_Rel, as otherwise any other place that calls
RelocationBaseSection::add(Relative)Reloc needs to know about this oddity. In
practice there are none, but the less "weird" all these various ABI extensions
are in terms of fitting into LLD's APIs the better. Downstream in Morello LLD
we've similarly hidden the way R_MORELLO_RELATIVE works (it emits a "fragment"
that describes the capability base/length/permissions, and the dynamic
relocation's addend is the capability offset) so you "just" call
RelocationBaseSection::add(Relative)Reloc and all the magic for how to actually
do that happens for you. Following that for MTE globals would be a good idea
IMO.
https://github.com/llvm/llvm-project/pull/171182
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang/HIP: Avoid using ocml logb (PR #171186)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.com/github/pr/llvm/llvm-project/171186?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#171186** https://app.graphite.com/github/pr/llvm/llvm-project/171186?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.com/github/pr/llvm/llvm-project/171186?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#171185** https://app.graphite.com/github/pr/llvm/llvm-project/171185?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/171186 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang/HIP: Avoid using ocml logb (PR #171186)
https://github.com/arsenm created
https://github.com/llvm/llvm-project/pull/171186
We have special case handling for the logb builtins, so use them.
>From 46c34ddcc57617d4c42839aedd53a96a18853581 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Mon, 8 Dec 2025 16:04:44 +0100
Subject: [PATCH] clang/HIP: Avoid using ocml logb
We have special case handling for the logb builtins, so use them.
---
clang/lib/Headers/__clang_hip_math.h| 4 +-
clang/test/Headers/__clang_hip_math.hip | 162 +---
2 files changed, 118 insertions(+), 48 deletions(-)
diff --git a/clang/lib/Headers/__clang_hip_math.h
b/clang/lib/Headers/__clang_hip_math.h
index 759e742c9d012..03c2721b4ad3c 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -498,7 +498,7 @@ __DEVICE__
float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __builtin_log2f)(__x);
}
__DEVICE__
-float logbf(float __x) { return __ocml_logb_f32(__x); }
+float logbf(float __x) { return __builtin_logbf(__x); }
__DEVICE__
float logf(float __x) { return __FAST_OR_SLOW(__logf, __builtin_logf)(__x); }
@@ -901,7 +901,7 @@ __DEVICE__
double log2(double __x) { return __ocml_log2_f64(__x); }
__DEVICE__
-double logb(double __x) { return __ocml_logb_f64(__x); }
+double logb(double __x) { return __builtin_logb(__x); }
__DEVICE__
long int lrint(double __x) { return __builtin_rint(__x); }
diff --git a/clang/test/Headers/__clang_hip_math.hip
b/clang/test/Headers/__clang_hip_math.hip
index 4163666811c91..426e5af319cbf 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -3871,69 +3871,139 @@ extern "C" __device__ double test_log2(double x) {
return log2(x);
}
-// DEFAULT-LABEL: define dso_local noundef float @test_logbf(
-// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-LABEL: define dso_local float @test_logbf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
-// DEFAULT-NEXT:[[CALL_I:%.*]] = tail call contract noundef float
@__ocml_logb_f32(float noundef [[X]]) #[[ATTR13]]
-// DEFAULT-NEXT:ret float [[CALL_I]]
-//
-// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float
@test_logbf(
-// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]])
local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:[[TMP0:%.*]] = tail call { float, i32 }
@llvm.frexp.f32.i32(float [[X]])
+// DEFAULT-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// DEFAULT-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// DEFAULT-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// DEFAULT-NEXT:[[TMP4:%.*]] = tail call contract float
@llvm.fabs.f32(float [[X]])
+// DEFAULT-NEXT:[[TMP5:%.*]] = fcmp contract one float [[TMP4]],
0x7FF0
+// DEFAULT-NEXT:[[TMP6:%.*]] = select contract i1 [[TMP5]], float
[[TMP3]], float [[TMP4]]
+// DEFAULT-NEXT:[[TMP7:%.*]] = fcmp contract oeq float [[X]], 0.00e+00
+// DEFAULT-NEXT:[[TMP8:%.*]] = select contract i1 [[TMP7]], float
0xFFF0, float [[TMP6]]
+// DEFAULT-NEXT:ret float [[TMP8]]
+//
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_logbf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]])
local_unnamed_addr #[[ATTR3]] {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
-// FINITEONLY-NEXT:[[CALL_I:%.*]] = tail call nnan ninf contract noundef
nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf)
[[X]]) #[[ATTR13]]
-// FINITEONLY-NEXT:ret float [[CALL_I]]
+// FINITEONLY-NEXT:[[TMP0:%.*]] = tail call { float, i32 }
@llvm.frexp.f32.i32(float nofpclass(nan inf) [[X]])
+// FINITEONLY-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// FINITEONLY-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// FINITEONLY-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// FINITEONLY-NEXT:ret float [[TMP3]]
//
-// APPROX-LABEL: define dso_local noundef float @test_logbf(
-// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-LABEL: define dso_local float @test_logbf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
-// APPROX-NEXT:[[CALL_I:%.*]] = tail call contract noundef float
@__ocml_logb_f32(float noundef [[X]]) #[[ATTR13]]
-// APPROX-NEXT:ret float [[CALL_I]]
-//
-// NCRDIV-LABEL: define dso_local noundef float @test_logbf(
-// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:[[TMP0:%.*]] = tail call { float, i32 }
@llvm.frexp.f32.i32(float [[X]])
+// APPROX-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// APPROX-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// APPROX-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// APPROX-NEXT:[[TMP4:%.*]] = tail call contract float
@llvm.fabs.f32(float [[X]])
[llvm-branch-commits] [clang] clang/HIP: Avoid using ocml logb (PR #171186)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/171186 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC] [FlowSensitive] Fix missing namespace in MockHeaders (PR #170954)
https://github.com/jvoung approved this pull request. Nice catch! https://github.com/llvm/llvm-project/pull/170954 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [LifetimeSafety] Add implicit tracking for STL functions (PR #170005)
https://github.com/usx95 updated
https://github.com/llvm/llvm-project/pull/170005
>From 5be82f0e9576d5d1c5c74d5935b6c519bd63a9ff Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena
Date: Sat, 29 Nov 2025 15:07:40 +
Subject: [PATCH] Implicit lifetimebound for std namespace
---
.../LifetimeSafety/LifetimeAnnotations.h | 14 ++
.../LifetimeSafety/FactsGenerator.cpp | 6 +-
.../LifetimeSafety/LifetimeAnnotations.cpp| 82
clang/lib/Analysis/LifetimeSafety/Origins.cpp | 4 +
clang/lib/Sema/CheckExprLifetime.cpp | 64 +--
.../unittests/Analysis/LifetimeSafetyTest.cpp | 180 ++
6 files changed, 287 insertions(+), 63 deletions(-)
diff --git
a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
index 1a16fb82f9a84..8e26a4d41a957 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
@@ -38,6 +38,20 @@ bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl
*CMD);
/// method or because it's a normal assignment operator.
bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD);
+// Returns true if the implicit object argument (this) of a method call should
+// be tracked for GSL lifetime analysis. This applies to STL methods that
return
+// pointers or references that depend on the lifetime of the object, such as
+// container iterators (begin, end), data accessors (c_str, data, get), or
+// element accessors (operator[], operator*, front, back, at).
+bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee);
+
+// Returns true if the first argument of a free function should be tracked for
+// GSL lifetime analysis. This applies to STL free functions that take a
pointer
+// to a GSL Owner or Pointer and return a pointer or reference that depends on
+// the lifetime of the argument, such as std::begin, std::data, std::get, or
+// std::any_cast.
+bool shouldTrackFirstArgument(const FunctionDecl *FD);
+
// Tells whether the type is annotated with [[gsl::Pointer]].
bool isGslPointerType(QualType QT);
// Tells whether the type is annotated with [[gsl::Owner]].
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index c897a5fcd718b..eb219e2f7f334 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -15,6 +15,7 @@
#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h"
#include "clang/Analysis/Analyses/PostOrderCFGView.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/TimeProfiler.h"
@@ -410,11 +411,14 @@ void FactsGenerator::handleFunctionCall(const Expr *Call,
Method && Method->isInstance()) {
if (I == 0)
// For the 'this' argument, the attribute is on the method itself.
-return implicitObjectParamIsLifetimeBound(Method);
+return implicitObjectParamIsLifetimeBound(Method) ||
+ shouldTrackImplicitObjectArg(Method);
if ((I - 1) < Method->getNumParams())
// For explicit arguments, find the corresponding parameter
// declaration.
PVD = Method->getParamDecl(I - 1);
+} else if (I == 0 && shouldTrackFirstArgument(FD)) {
+ return true;
} else if (I < FD->getNumParams()) {
// For free functions or static methods.
PVD = FD->getParamDecl(I);
diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
index 54e343fc2ee5e..860aa5373a32c 100644
--- a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
@@ -71,6 +71,88 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl
*FD) {
return isNormalAssignmentOperator(FD);
}
+// Decl::isInStdNamespace will return false for iterators in some STL
+// implementations due to them being defined in a namespace outside of the std
+// namespace.
+static bool isInStlNamespace(const Decl *D) {
+ const DeclContext *DC = D->getDeclContext();
+ if (!DC)
+return false;
+ if (const auto *ND = dyn_cast(DC))
+if (const IdentifierInfo *II = ND->getIdentifier()) {
+ StringRef Name = II->getName();
+ if (Name.size() >= 2 && Name.front() == '_' &&
+ (Name[1] == '_' || isUppercase(Name[1])))
+return true;
+}
+
+ return DC->isStdNamespace();
+}
+
+static bool isPointerLikeType(QualType QT) {
+ return isGslPointerType(QT) || QT->isPointerType() || QT->isNullPtrType();
+}
+
+bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
+ if (auto *Conv = dyn_cast_or_null(Callee))
+if (isGslPointerType(Conv->getConversionType()) &&
+Callee->getParent()->hasAttr(
[llvm-branch-commits] [clang] [LifetimeSafety] Track moved declarations to prevent false positives (PR #170007)
https://github.com/usx95 updated
https://github.com/llvm/llvm-project/pull/170007
>From 444baf2e0a43002cdcc8f516cd94deab30c3be57 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena
Date: Sat, 29 Nov 2025 16:43:06 +
Subject: [PATCH] std_move false positive
---
.../Analyses/LifetimeSafety/FactsGenerator.h | 5
.../LifetimeSafety/FactsGenerator.cpp | 23 +++
clang/test/Sema/warn-lifetime-safety.cpp | 18 +++
3 files changed, 46 insertions(+)
diff --git
a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
index ffe9101606a97..ac97991342b86 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
@@ -102,6 +102,11 @@ class FactsGenerator : public
ConstStmtVisitor {
// corresponding to the left-hand side is updated to be a "write", thereby
// exempting it from the check.
llvm::DenseMap UseFacts;
+
+ // Tracks declarations that have been moved via std::move. This is used to
+ // prevent false positives when the original owner is destroyed after the
+ // value has been moved. This tracking is flow-insensitive.
+ llvm::DenseSet MovedDecls;
};
} // namespace clang::lifetimes::internal
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index 3ff817de0d18a..bf617261cc23b 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -183,9 +183,27 @@ void FactsGenerator::VisitCXXMemberCallExpr(const
CXXMemberCallExpr *MCE) {
}
}
+static bool isStdMove(const FunctionDecl *FD) {
+ return FD && FD->isInStdNamespace() && FD->getIdentifier() &&
+ FD->getName() == "move";
+}
+
void FactsGenerator::VisitCallExpr(const CallExpr *CE) {
handleFunctionCall(CE, CE->getDirectCallee(),
{CE->getArgs(), CE->getNumArgs()});
+ // Track declarations that are moved via std::move.
+ // This is a flow-insensitive approximation: once a declaration is moved
+ // anywhere in the function, it's treated as moved everywhere. This can lead
+ // to false negatives on control flow paths where the value is not actually
+ // moved, but these are considered lower priority than the false positives
+ // this tracking prevents.
+ // TODO: The ideal solution would be flow-sensitive ownership tracking that
+ // records where values are moved from and to, but this is more complex.
+ if (isStdMove(CE->getDirectCallee()))
+if (CE->getNumArgs() == 1)
+ if (auto *DRE =
+ dyn_cast(CE->getArg(0)->IgnoreParenImpCasts()))
+MovedDecls.insert(DRE->getDecl());
}
void FactsGenerator::VisitCXXNullPtrLiteralExpr(
@@ -364,6 +382,11 @@ void FactsGenerator::handleLifetimeEnds(const
CFGLifetimeEnds &LifetimeEnds) {
// Iterate through all loans to see if any expire.
for (const auto *Loan : FactMgr.getLoanMgr().getLoans()) {
if (const auto *BL = dyn_cast(Loan)) {
+ // Skip loans for declarations that have been moved. When a value is
+ // moved, the original owner no longer has ownership and its destruction
+ // should not cause the loan to expire, preventing false positives.
+ if (MovedDecls.contains(BL->getAccessPath().D))
+continue;
// Check if the loan is for a stack variable and if that variable
// is the one being destructed.
if (BL->getAccessPath().D == LifetimeEndsVD)
diff --git a/clang/test/Sema/warn-lifetime-safety.cpp
b/clang/test/Sema/warn-lifetime-safety.cpp
index f22c73cfeb784..97a79cc4ce102 100644
--- a/clang/test/Sema/warn-lifetime-safety.cpp
+++ b/clang/test/Sema/warn-lifetime-safety.cpp
@@ -1,9 +1,14 @@
// RUN: %clang_cc1 -fsyntax-only -fexperimental-lifetime-safety
-Wexperimental-lifetime-safety -Wno-dangling -verify %s
+#include "Inputs/lifetime-analysis.h"
+
struct View;
struct [[gsl::Owner]] MyObj {
int id;
+ MyObj();
+ MyObj(int);
+ MyObj(const MyObj&);
~MyObj() {} // Non-trivial destructor
MyObj operator+(MyObj);
@@ -1297,3 +1302,16 @@ void add(int c, MyObj* node) {
arr[4] = node;
}
} // namespace CppCoverage
+
+namespace do_not_warn_on_std_move {
+void silenced() {
+ MyObj b;
+ View v;
+ {
+MyObj a;
+v = a;
+b = std::move(a); // No warning for 'a' being moved.
+ }
+ (void)v;
+}
+} // namespace do_not_warn_on_std_move
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [LifetimeSafety] Add implicit tracking for STL functions (PR #170005)
https://github.com/usx95 updated
https://github.com/llvm/llvm-project/pull/170005
>From 5be82f0e9576d5d1c5c74d5935b6c519bd63a9ff Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena
Date: Sat, 29 Nov 2025 15:07:40 +
Subject: [PATCH] Implicit lifetimebound for std namespace
---
.../LifetimeSafety/LifetimeAnnotations.h | 14 ++
.../LifetimeSafety/FactsGenerator.cpp | 6 +-
.../LifetimeSafety/LifetimeAnnotations.cpp| 82
clang/lib/Analysis/LifetimeSafety/Origins.cpp | 4 +
clang/lib/Sema/CheckExprLifetime.cpp | 64 +--
.../unittests/Analysis/LifetimeSafetyTest.cpp | 180 ++
6 files changed, 287 insertions(+), 63 deletions(-)
diff --git
a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
index 1a16fb82f9a84..8e26a4d41a957 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/LifetimeAnnotations.h
@@ -38,6 +38,20 @@ bool isAssignmentOperatorLifetimeBound(const CXXMethodDecl
*CMD);
/// method or because it's a normal assignment operator.
bool implicitObjectParamIsLifetimeBound(const FunctionDecl *FD);
+// Returns true if the implicit object argument (this) of a method call should
+// be tracked for GSL lifetime analysis. This applies to STL methods that
return
+// pointers or references that depend on the lifetime of the object, such as
+// container iterators (begin, end), data accessors (c_str, data, get), or
+// element accessors (operator[], operator*, front, back, at).
+bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee);
+
+// Returns true if the first argument of a free function should be tracked for
+// GSL lifetime analysis. This applies to STL free functions that take a
pointer
+// to a GSL Owner or Pointer and return a pointer or reference that depends on
+// the lifetime of the argument, such as std::begin, std::data, std::get, or
+// std::any_cast.
+bool shouldTrackFirstArgument(const FunctionDecl *FD);
+
// Tells whether the type is annotated with [[gsl::Pointer]].
bool isGslPointerType(QualType QT);
// Tells whether the type is annotated with [[gsl::Owner]].
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index c897a5fcd718b..eb219e2f7f334 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -15,6 +15,7 @@
#include "clang/Analysis/Analyses/LifetimeSafety/Origins.h"
#include "clang/Analysis/Analyses/PostOrderCFGView.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/TimeProfiler.h"
@@ -410,11 +411,14 @@ void FactsGenerator::handleFunctionCall(const Expr *Call,
Method && Method->isInstance()) {
if (I == 0)
// For the 'this' argument, the attribute is on the method itself.
-return implicitObjectParamIsLifetimeBound(Method);
+return implicitObjectParamIsLifetimeBound(Method) ||
+ shouldTrackImplicitObjectArg(Method);
if ((I - 1) < Method->getNumParams())
// For explicit arguments, find the corresponding parameter
// declaration.
PVD = Method->getParamDecl(I - 1);
+} else if (I == 0 && shouldTrackFirstArgument(FD)) {
+ return true;
} else if (I < FD->getNumParams()) {
// For free functions or static methods.
PVD = FD->getParamDecl(I);
diff --git a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
index 54e343fc2ee5e..860aa5373a32c 100644
--- a/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/LifetimeAnnotations.cpp
@@ -71,6 +71,88 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl
*FD) {
return isNormalAssignmentOperator(FD);
}
+// Decl::isInStdNamespace will return false for iterators in some STL
+// implementations due to them being defined in a namespace outside of the std
+// namespace.
+static bool isInStlNamespace(const Decl *D) {
+ const DeclContext *DC = D->getDeclContext();
+ if (!DC)
+return false;
+ if (const auto *ND = dyn_cast(DC))
+if (const IdentifierInfo *II = ND->getIdentifier()) {
+ StringRef Name = II->getName();
+ if (Name.size() >= 2 && Name.front() == '_' &&
+ (Name[1] == '_' || isUppercase(Name[1])))
+return true;
+}
+
+ return DC->isStdNamespace();
+}
+
+static bool isPointerLikeType(QualType QT) {
+ return isGslPointerType(QT) || QT->isPointerType() || QT->isNullPtrType();
+}
+
+bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
+ if (auto *Conv = dyn_cast_or_null(Callee))
+if (isGslPointerType(Conv->getConversionType()) &&
+Callee->getParent()->hasAttr(
[llvm-branch-commits] [clang] [LifetimeSafety] Track moved declarations to prevent false positives (PR #170007)
https://github.com/usx95 updated
https://github.com/llvm/llvm-project/pull/170007
>From 444baf2e0a43002cdcc8f516cd94deab30c3be57 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena
Date: Sat, 29 Nov 2025 16:43:06 +
Subject: [PATCH] std_move false positive
---
.../Analyses/LifetimeSafety/FactsGenerator.h | 5
.../LifetimeSafety/FactsGenerator.cpp | 23 +++
clang/test/Sema/warn-lifetime-safety.cpp | 18 +++
3 files changed, 46 insertions(+)
diff --git
a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
index ffe9101606a97..ac97991342b86 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
@@ -102,6 +102,11 @@ class FactsGenerator : public
ConstStmtVisitor {
// corresponding to the left-hand side is updated to be a "write", thereby
// exempting it from the check.
llvm::DenseMap UseFacts;
+
+ // Tracks declarations that have been moved via std::move. This is used to
+ // prevent false positives when the original owner is destroyed after the
+ // value has been moved. This tracking is flow-insensitive.
+ llvm::DenseSet MovedDecls;
};
} // namespace clang::lifetimes::internal
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index 3ff817de0d18a..bf617261cc23b 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -183,9 +183,27 @@ void FactsGenerator::VisitCXXMemberCallExpr(const
CXXMemberCallExpr *MCE) {
}
}
+static bool isStdMove(const FunctionDecl *FD) {
+ return FD && FD->isInStdNamespace() && FD->getIdentifier() &&
+ FD->getName() == "move";
+}
+
void FactsGenerator::VisitCallExpr(const CallExpr *CE) {
handleFunctionCall(CE, CE->getDirectCallee(),
{CE->getArgs(), CE->getNumArgs()});
+ // Track declarations that are moved via std::move.
+ // This is a flow-insensitive approximation: once a declaration is moved
+ // anywhere in the function, it's treated as moved everywhere. This can lead
+ // to false negatives on control flow paths where the value is not actually
+ // moved, but these are considered lower priority than the false positives
+ // this tracking prevents.
+ // TODO: The ideal solution would be flow-sensitive ownership tracking that
+ // records where values are moved from and to, but this is more complex.
+ if (isStdMove(CE->getDirectCallee()))
+if (CE->getNumArgs() == 1)
+ if (auto *DRE =
+ dyn_cast(CE->getArg(0)->IgnoreParenImpCasts()))
+MovedDecls.insert(DRE->getDecl());
}
void FactsGenerator::VisitCXXNullPtrLiteralExpr(
@@ -364,6 +382,11 @@ void FactsGenerator::handleLifetimeEnds(const
CFGLifetimeEnds &LifetimeEnds) {
// Iterate through all loans to see if any expire.
for (const auto *Loan : FactMgr.getLoanMgr().getLoans()) {
if (const auto *BL = dyn_cast(Loan)) {
+ // Skip loans for declarations that have been moved. When a value is
+ // moved, the original owner no longer has ownership and its destruction
+ // should not cause the loan to expire, preventing false positives.
+ if (MovedDecls.contains(BL->getAccessPath().D))
+continue;
// Check if the loan is for a stack variable and if that variable
// is the one being destructed.
if (BL->getAccessPath().D == LifetimeEndsVD)
diff --git a/clang/test/Sema/warn-lifetime-safety.cpp
b/clang/test/Sema/warn-lifetime-safety.cpp
index f22c73cfeb784..97a79cc4ce102 100644
--- a/clang/test/Sema/warn-lifetime-safety.cpp
+++ b/clang/test/Sema/warn-lifetime-safety.cpp
@@ -1,9 +1,14 @@
// RUN: %clang_cc1 -fsyntax-only -fexperimental-lifetime-safety
-Wexperimental-lifetime-safety -Wno-dangling -verify %s
+#include "Inputs/lifetime-analysis.h"
+
struct View;
struct [[gsl::Owner]] MyObj {
int id;
+ MyObj();
+ MyObj(int);
+ MyObj(const MyObj&);
~MyObj() {} // Non-trivial destructor
MyObj operator+(MyObj);
@@ -1297,3 +1302,16 @@ void add(int c, MyObj* node) {
arr[4] = node;
}
} // namespace CppCoverage
+
+namespace do_not_warn_on_std_move {
+void silenced() {
+ MyObj b;
+ View v;
+ {
+MyObj a;
+v = a;
+b = std::move(a); // No warning for 'a' being moved.
+ }
+ (void)v;
+}
+} // namespace do_not_warn_on_std_move
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [LifetimeSafety] Add origin tracking for pointer dereference (PR #170006)
https://github.com/usx95 updated
https://github.com/llvm/llvm-project/pull/170006
>From 9308d55447964a7e52d252a7273bbed1a4fd6832 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena
Date: Sat, 29 Nov 2025 15:49:00 +
Subject: [PATCH] dereference_operator
---
.../LifetimeSafety/FactsGenerator.cpp | 4
.../Sema/warn-lifetime-safety-dataflow.cpp| 6 +
.../Sema/warn-lifetime-safety-suggestions.cpp | 9 +---
clang/test/Sema/warn-lifetime-safety.cpp | 22 +--
4 files changed, 27 insertions(+), 14 deletions(-)
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index eb219e2f7f334..3ff817de0d18a 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -239,6 +239,10 @@ void FactsGenerator::VisitUnaryOperator(const
UnaryOperator *UO) {
// origin of this UnaryOperator expression.
killAndFlowOrigin(*UO, *SubExpr);
}
+ if (UO->getOpcode() == UO_Deref) {
+const Expr *SubExpr = UO->getSubExpr();
+killAndFlowOrigin(*UO, *SubExpr);
+ }
}
void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) {
diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
index 6d5711deba1cf..6fc7c776f935c 100644
--- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
@@ -152,6 +152,12 @@ void pointer_indirection() {
// CHECK-NEXT: Dest: {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int *)
// CHECK-NEXT: Src: [[O_PP_INNER]] (Decl: pp, Type : int *)
// CHECK: OriginFlow:
+// CHECK-NEXT: Dest: {{[0-9]+}} (Expr: UnaryOperator, Type : int *&)
+// CHECK-NEXT: Src: {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int **)
+// CHECK: OriginFlow:
+// CHECK-NEXT: Dest: {{[0-9]+}} (Expr: UnaryOperator, Type : int *)
+// CHECK-NEXT: Src: {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int *)
+// CHECK: OriginFlow:
// CHECK-NEXT: Dest: {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int *)
// CHECK-NEXT: Src: {{[0-9]+}} (Expr: UnaryOperator, Type : int *)
// CHECK: OriginFlow:
diff --git a/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
index 280f774b4664c..abc92dd86cbde 100644
--- a/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
@@ -49,9 +49,12 @@ MyObj* return_ptr_to_ref(MyObj& a) { // expected-warning
{{param should be marke
return &a; // expected-note {{param returned here}}
}
-// FIXME: Dereference does not propagate loans.
-MyObj& return_ref_to_ptr(MyObj* a) {
- return *a;
+MyObj& return_ref_to_ptr(MyObj* a) { // expected-warning {{param should be
marked [[clang::lifetimebound]]}}
+ return *a; // expected-note {{param returned here}}
+}
+
+View return_ref_to_ptr_multiple(MyObj* a) { // expected-warning {{param
should be marked [[clang::lifetimebound]]}}
+ return *(&(*(&(*a; // expected-note {{param returned here}}
}
View return_view_from_reference(MyObj& p) { // expected-warning {{param
should be marked [[clang::lifetimebound]]}}
diff --git a/clang/test/Sema/warn-lifetime-safety.cpp
b/clang/test/Sema/warn-lifetime-safety.cpp
index e62c3b69b040b..f22c73cfeb784 100644
--- a/clang/test/Sema/warn-lifetime-safety.cpp
+++ b/clang/test/Sema/warn-lifetime-safety.cpp
@@ -596,10 +596,10 @@ const int* return_pointer_to_parameter_via_reference(int
a, int b, bool cond) {
const int* d = &c;
return d; // expected-note 2 {{returned here}}
}
-// FIXME: Dereference of a pointer does not track the reference.
+
const int& return_pointer_to_parameter_via_reference_1(int a) {
-const int* d = &a;
-return *d;
+const int* d = &a; // expected-warning {{address of stack memory is
returned later}}
+return *d;// expected-note {{returned here}}
}
const int& get_ref_to_local() {
@@ -1118,24 +1118,24 @@ struct MyObjStorage {
const MyObj *end() const { return objs + 1; }
};
-// FIXME: Detect use-after-scope. Dereference pointer does not propagate the
origins.
void range_based_for_use_after_scope() {
View v;
{
MyObjStorage s;
-for (const MyObj &o : s) {
+for (const MyObj &o : s) { // expected-warning {{object whose reference is
captured does not live long enough}}
v = o;
}
- }
- v.use();
+ } // expected-note {{destroyed here}}
+ v.use(); // expected-note {{later used here}}
}
-// FIXME: Detect use-after-return. Dereference pointer does not propagate the
origins.
+
View range_based_for_use_after_return() {
MyObjStorage s;
- for (const MyObj &o : s) {
-return o;
+ for (const MyObj &o : s) { // expected-warning {{address of stack memory is
returned later}}
+return o; // expected-note {{returned here}}
}
- return *s.begin();
+ return *
[llvm-branch-commits] [clang] [LifetimeSafety] Add origin tracking for pointer dereference (PR #170006)
https://github.com/usx95 updated
https://github.com/llvm/llvm-project/pull/170006
>From 9308d55447964a7e52d252a7273bbed1a4fd6832 Mon Sep 17 00:00:00 2001
From: Utkarsh Saxena
Date: Sat, 29 Nov 2025 15:49:00 +
Subject: [PATCH] dereference_operator
---
.../LifetimeSafety/FactsGenerator.cpp | 4
.../Sema/warn-lifetime-safety-dataflow.cpp| 6 +
.../Sema/warn-lifetime-safety-suggestions.cpp | 9 +---
clang/test/Sema/warn-lifetime-safety.cpp | 22 +--
4 files changed, 27 insertions(+), 14 deletions(-)
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index eb219e2f7f334..3ff817de0d18a 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -239,6 +239,10 @@ void FactsGenerator::VisitUnaryOperator(const
UnaryOperator *UO) {
// origin of this UnaryOperator expression.
killAndFlowOrigin(*UO, *SubExpr);
}
+ if (UO->getOpcode() == UO_Deref) {
+const Expr *SubExpr = UO->getSubExpr();
+killAndFlowOrigin(*UO, *SubExpr);
+ }
}
void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) {
diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
index 6d5711deba1cf..6fc7c776f935c 100644
--- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
@@ -152,6 +152,12 @@ void pointer_indirection() {
// CHECK-NEXT: Dest: {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int *)
// CHECK-NEXT: Src: [[O_PP_INNER]] (Decl: pp, Type : int *)
// CHECK: OriginFlow:
+// CHECK-NEXT: Dest: {{[0-9]+}} (Expr: UnaryOperator, Type : int *&)
+// CHECK-NEXT: Src: {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int **)
+// CHECK: OriginFlow:
+// CHECK-NEXT: Dest: {{[0-9]+}} (Expr: UnaryOperator, Type : int *)
+// CHECK-NEXT: Src: {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int *)
+// CHECK: OriginFlow:
// CHECK-NEXT: Dest: {{[0-9]+}} (Expr: ImplicitCastExpr, Type : int *)
// CHECK-NEXT: Src: {{[0-9]+}} (Expr: UnaryOperator, Type : int *)
// CHECK: OriginFlow:
diff --git a/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
index 280f774b4664c..abc92dd86cbde 100644
--- a/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-suggestions.cpp
@@ -49,9 +49,12 @@ MyObj* return_ptr_to_ref(MyObj& a) { // expected-warning
{{param should be marke
return &a; // expected-note {{param returned here}}
}
-// FIXME: Dereference does not propagate loans.
-MyObj& return_ref_to_ptr(MyObj* a) {
- return *a;
+MyObj& return_ref_to_ptr(MyObj* a) { // expected-warning {{param should be
marked [[clang::lifetimebound]]}}
+ return *a; // expected-note {{param returned here}}
+}
+
+View return_ref_to_ptr_multiple(MyObj* a) { // expected-warning {{param
should be marked [[clang::lifetimebound]]}}
+ return *(&(*(&(*a; // expected-note {{param returned here}}
}
View return_view_from_reference(MyObj& p) { // expected-warning {{param
should be marked [[clang::lifetimebound]]}}
diff --git a/clang/test/Sema/warn-lifetime-safety.cpp
b/clang/test/Sema/warn-lifetime-safety.cpp
index e62c3b69b040b..f22c73cfeb784 100644
--- a/clang/test/Sema/warn-lifetime-safety.cpp
+++ b/clang/test/Sema/warn-lifetime-safety.cpp
@@ -596,10 +596,10 @@ const int* return_pointer_to_parameter_via_reference(int
a, int b, bool cond) {
const int* d = &c;
return d; // expected-note 2 {{returned here}}
}
-// FIXME: Dereference of a pointer does not track the reference.
+
const int& return_pointer_to_parameter_via_reference_1(int a) {
-const int* d = &a;
-return *d;
+const int* d = &a; // expected-warning {{address of stack memory is
returned later}}
+return *d;// expected-note {{returned here}}
}
const int& get_ref_to_local() {
@@ -1118,24 +1118,24 @@ struct MyObjStorage {
const MyObj *end() const { return objs + 1; }
};
-// FIXME: Detect use-after-scope. Dereference pointer does not propagate the
origins.
void range_based_for_use_after_scope() {
View v;
{
MyObjStorage s;
-for (const MyObj &o : s) {
+for (const MyObj &o : s) { // expected-warning {{object whose reference is
captured does not live long enough}}
v = o;
}
- }
- v.use();
+ } // expected-note {{destroyed here}}
+ v.use(); // expected-note {{later used here}}
}
-// FIXME: Detect use-after-return. Dereference pointer does not propagate the
origins.
+
View range_based_for_use_after_return() {
MyObjStorage s;
- for (const MyObj &o : s) {
-return o;
+ for (const MyObj &o : s) { // expected-warning {{address of stack memory is
returned later}}
+return o; // expected-note {{returned here}}
}
- return *s.begin();
+ return *
[llvm-branch-commits] [mlir] [mlir][py] partially use mlir_type_subclass for IRTypes.cpp (PR #171143)
https://github.com/ftynse created
https://github.com/llvm/llvm-project/pull/171143
Port the bindings for non-shaped builtin types in IRTypes.cpp to use the
`mlir_type_subclass` mechanism used by non-builtin types. This is part of a
longer-term cleanup to only support one subclassing mechanism. Eventually, the
`PyConcreteType` mechanism will be removed.
This required a surgery in the type casters and the `mlir_type_subclass` logic
to avoid circular imports of the `_mlir.ir` module that would otherwise when
using `mlir_type_subclass` to define classes in the `_mlir.ir` module.
Tests are updated to use the `.get_static_typeid()` function instead of the
`.static_typeid` property that was specific to builtin types due to the
`PyConcreteType` mechanism. The change should be NFC otherwise.
>From deac26450350ba40b9f9357f68ec3a5e458b43d6 Mon Sep 17 00:00:00 2001
From: Alex Zinenko
Date: Mon, 8 Dec 2025 15:50:41 +0100
Subject: [PATCH] [mlir][py] partially use mlir_type_subclass for IRTypes.cpp
Port the bindings for non-shaped builtin types in IRTypes.cpp to use the
`mlir_type_subclass` mechanism used by non-builtin types. This is part of a
longer-term cleanup to only support one subclassing mechanism. Eventually, the
`PyConcreteType` mechanism will be removed.
This required a surgery in the type casters and the `mlir_type_subclass` logic
to avoid circular imports of the `_mlir.ir` module that would otherwise when
using `mlir_type_subclass` to define classes in the `_mlir.ir` module.
Tests are updated to use the `.get_static_typeid()` function instead of the
`.static_typeid` property that was specific to builtin types due to the
`PyConcreteType` mechanism. The change should be NFC otherwise.
---
.../mlir/Bindings/Python/NanobindAdaptors.h | 41 +-
mlir/lib/Bindings/Python/IRTypes.cpp | 1029 ++---
mlir/lib/Bindings/Python/MainModule.cpp | 15 +
mlir/test/python/dialects/arith_dialect.py|8 +-
mlir/test/python/ir/builtin_types.py | 11 +-
mlir/test/python/ir/value.py |6 +-
6 files changed, 425 insertions(+), 685 deletions(-)
diff --git a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
index 6594670abaaa7..f678f57527e97 100644
--- a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
+++ b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
@@ -371,16 +371,22 @@ struct type_caster {
}
return false;
}
- static handle from_cpp(MlirTypeID v, rv_policy,
- cleanup_list *cleanup) noexcept {
+
+ static handle
+ from_cpp_given_module(MlirTypeID v,
+const nanobind::module_ &module) noexcept {
if (v.ptr == nullptr)
return nanobind::none();
nanobind::object capsule =
nanobind::steal(mlirPythonTypeIDToCapsule(v));
-return mlir::python::irModule()
-.attr("TypeID")
+return module.attr("TypeID")
.attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
.release();
+ }
+
+ static handle from_cpp(MlirTypeID v, rv_policy,
+ cleanup_list *cleanup) noexcept {
+return from_cpp_given_module(v, mlir::python::irModule());
};
};
@@ -602,9 +608,12 @@ class mlir_type_subclass : public pure_subclass {
/// Subclasses by looking up the super-class dynamically.
mlir_type_subclass(nanobind::handle scope, const char *typeClassName,
IsAFunctionTy isaFunction,
- GetTypeIDFunctionTy getTypeIDFunction = nullptr)
- : mlir_type_subclass(scope, typeClassName, isaFunction,
- irModule().attr("Type"), getTypeIDFunction) {}
+ GetTypeIDFunctionTy getTypeIDFunction = nullptr,
+ const nanobind::module_ *mlirIrModule = nullptr)
+ : mlir_type_subclass(
+scope, typeClassName, isaFunction,
+(mlirIrModule != nullptr ? *mlirIrModule :
irModule()).attr("Type"),
+getTypeIDFunction, mlirIrModule) {}
/// Subclasses with a provided mlir.ir.Type super-class. This must
/// be used if the subclass is being defined in the same extension module
@@ -613,7 +622,8 @@ class mlir_type_subclass : public pure_subclass {
mlir_type_subclass(nanobind::handle scope, const char *typeClassName,
IsAFunctionTy isaFunction,
const nanobind::object &superCls,
- GetTypeIDFunctionTy getTypeIDFunction = nullptr)
+ GetTypeIDFunctionTy getTypeIDFunction = nullptr,
+ const nanobind::module_ *mlirIrModule = nullptr)
: pure_subclass(scope, typeClassName, superCls) {
// Casting constructor. Note that it is hard, if not impossible, to
properly
// call chain to parent `__init__` in nanobind due to its special handling
@@ -672,9 +682,18 @@ class mlir_type_subclass : public pure_subclass {
nanobind::sig("def get_s
[llvm-branch-commits] [mlir] [mlir][py] partially use mlir_type_subclass for IRTypes.cpp (PR #171143)
github-actions[bot] wrote:
:warning: Python code formatter, darker found issues in your code. :warning:
You can test this locally with the following command:
``bash
darker --check --diff -r origin/main...HEAD
mlir/test/python/dialects/arith_dialect.py mlir/test/python/ir/builtin_types.py
mlir/test/python/ir/value.py
``
:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:
View the diff from darker here.
``diff
--- ir/builtin_types.py 2025-12-08 14:50:41.00 +
+++ ir/builtin_types.py 2025-12-08 14:58:38.795311 +
@@ -713,11 +713,16 @@
# Test getTypeIdFunction agrees with
# mlirTypeGetTypeID(self) for an instance.
# CHECK: all equal
for t1, t2 in types:
# TODO: remove the alternative once mlir_type_subclass transition
is complete.
-tid1, tid2 = t1.static_typeid if hasattr(t1, "static_typeid") else
t1.get_static_typeid(), Type(t2).typeid
+tid1, tid2 = (
+t1.static_typeid
+if hasattr(t1, "static_typeid")
+else t1.get_static_typeid(),
+Type(t2).typeid,
+)
assert tid1 == tid2 and hash(tid1) == hash(
tid2
), f"expected hash and value equality {t1} {t2}"
else:
print("all equal")
@@ -728,11 +733,15 @@
assert len(typeid_dict)
# CHECK: all equal
for t1, t2 in typeid_dict.items():
# TODO: remove the alternative once mlir_type_subclass transition
is complete.
-tid1 = t1.static_typeid if hasattr(t1, "static_typeid") else
t1.get_static_typeid()
+tid1 = (
+t1.static_typeid
+if hasattr(t1, "static_typeid")
+else t1.get_static_typeid()
+)
assert tid1 == t2.typeid and hash(tid1) == hash(
t2.typeid
), f"expected hash and value equality {t1} {t2}"
else:
print("all equal")
``
https://github.com/llvm/llvm-project/pull/171143
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][py] partially use mlir_type_subclass for IRTypes.cpp (PR #171143)
llvmbot wrote:
@llvm/pr-subscribers-mlir
Author: Oleksandr "Alex" Zinenko (ftynse)
Changes
Port the bindings for non-shaped builtin types in IRTypes.cpp to use the
`mlir_type_subclass` mechanism used by non-builtin types. This is part of a
longer-term cleanup to only support one subclassing mechanism. Eventually, the
`PyConcreteType` mechanism will be removed.
This required a surgery in the type casters and the `mlir_type_subclass` logic
to avoid circular imports of the `_mlir.ir` module that would otherwise when
using `mlir_type_subclass` to define classes in the `_mlir.ir` module.
Tests are updated to use the `.get_static_typeid()` function instead of the
`.static_typeid` property that was specific to builtin types due to the
`PyConcreteType` mechanism. The change should be NFC otherwise.
---
Patch is 49.00 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/171143.diff
6 Files Affected:
- (modified) mlir/include/mlir/Bindings/Python/NanobindAdaptors.h (+30-11)
- (modified) mlir/lib/Bindings/Python/IRTypes.cpp (+366-663)
- (modified) mlir/lib/Bindings/Python/MainModule.cpp (+15)
- (modified) mlir/test/python/dialects/arith_dialect.py (+4-4)
- (modified) mlir/test/python/ir/builtin_types.py (+7-4)
- (modified) mlir/test/python/ir/value.py (+3-3)
``diff
diff --git a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
index 6594670abaaa7..f678f57527e97 100644
--- a/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
+++ b/mlir/include/mlir/Bindings/Python/NanobindAdaptors.h
@@ -371,16 +371,22 @@ struct type_caster {
}
return false;
}
- static handle from_cpp(MlirTypeID v, rv_policy,
- cleanup_list *cleanup) noexcept {
+
+ static handle
+ from_cpp_given_module(MlirTypeID v,
+const nanobind::module_ &module) noexcept {
if (v.ptr == nullptr)
return nanobind::none();
nanobind::object capsule =
nanobind::steal(mlirPythonTypeIDToCapsule(v));
-return mlir::python::irModule()
-.attr("TypeID")
+return module.attr("TypeID")
.attr(MLIR_PYTHON_CAPI_FACTORY_ATTR)(capsule)
.release();
+ }
+
+ static handle from_cpp(MlirTypeID v, rv_policy,
+ cleanup_list *cleanup) noexcept {
+return from_cpp_given_module(v, mlir::python::irModule());
};
};
@@ -602,9 +608,12 @@ class mlir_type_subclass : public pure_subclass {
/// Subclasses by looking up the super-class dynamically.
mlir_type_subclass(nanobind::handle scope, const char *typeClassName,
IsAFunctionTy isaFunction,
- GetTypeIDFunctionTy getTypeIDFunction = nullptr)
- : mlir_type_subclass(scope, typeClassName, isaFunction,
- irModule().attr("Type"), getTypeIDFunction) {}
+ GetTypeIDFunctionTy getTypeIDFunction = nullptr,
+ const nanobind::module_ *mlirIrModule = nullptr)
+ : mlir_type_subclass(
+scope, typeClassName, isaFunction,
+(mlirIrModule != nullptr ? *mlirIrModule :
irModule()).attr("Type"),
+getTypeIDFunction, mlirIrModule) {}
/// Subclasses with a provided mlir.ir.Type super-class. This must
/// be used if the subclass is being defined in the same extension module
@@ -613,7 +622,8 @@ class mlir_type_subclass : public pure_subclass {
mlir_type_subclass(nanobind::handle scope, const char *typeClassName,
IsAFunctionTy isaFunction,
const nanobind::object &superCls,
- GetTypeIDFunctionTy getTypeIDFunction = nullptr)
+ GetTypeIDFunctionTy getTypeIDFunction = nullptr,
+ const nanobind::module_ *mlirIrModule = nullptr)
: pure_subclass(scope, typeClassName, superCls) {
// Casting constructor. Note that it is hard, if not impossible, to
properly
// call chain to parent `__init__` in nanobind due to its special handling
@@ -672,9 +682,18 @@ class mlir_type_subclass : public pure_subclass {
nanobind::sig("def get_static_typeid() -> "
MAKE_MLIR_PYTHON_QUALNAME("ir.TypeID"))
// clang-format on
);
- nanobind::module_::import_(MAKE_MLIR_PYTHON_QUALNAME("ir"))
- .attr(MLIR_PYTHON_CAPI_TYPE_CASTER_REGISTER_ATTR)(
- getTypeIDFunction())(nanobind::cpp_function(
+
+ // Directly call the caster implementation given the "ir" module,
+ // otherwise it may trigger recursive import as the default caster
+ // attempts to import the "ir" module.
+ MlirTypeID typeID = getTypeIDFunction();
+ mlirIrModule = mlirIrModule ? mlirIrModule : &irModule();
+ nanobind::handle pyTypeID =
+ nanobind::detail::type_caster::from_cpp_given_module(
+ typeID, *mlirIrModule);
+
+ mlirIrModule->attr(MLIR_PYTHON_CAPI_TYPE_CA
[llvm-branch-commits] [llvm] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize binary (PR #171149)
@@ -0,0 +1,46 @@ +# This test checks that BOLT can generate BTI landing pads for targets of stubs inserted in LongJmp. + +# REQUIRES: system-linux + +# RUN: %clang %s %cflags -Wl,-q -o %t -mbranch-protection=bti -Wl,-z,force-bti +# RUN: link_fdata --no-lbr %s %t %t.fdata +# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions \ +# RUN: --print-split --print-only foo --print-longjmp 2>&1 | FileCheck %s + +#CHECK: BOLT-INFO: Starting stub-insertion pass +#CHECK: Binary Function "foo" after long-jmp + +#CHECK: cmp x0, #0x0 +#CHECK-NEXT: Successors: .LStub0 + +#CHECK: adrpx16, .Ltmp0 +#CHECK-NEXT: add x16, x16, :lo12:.Ltmp0 +#CHECK-NEXT: br x16 # UNKNOWN CONTROL FLOW + +#CHECK: --- HOT-COLD SPLIT POINT --- + +#CHECK: bti c +#CHECK-NEXT: mov x0, #0x2 +#CHECK-NEXT: ret bgergely0 wrote: this is about the space between # and CHECK, right? https://github.com/llvm/llvm-project/pull/171149 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [WebAssembly] Remove FAKE_USEs before ExplicitLocals (#160768) (PR #171184)
llvmbot wrote:
@llvm/pr-subscribers-backend-webassembly
Author: None (llvmbot)
Changes
Backport e5b2a06546eb20662156b8a59b77aca086301486
Requested by: @dschuff
---
Full diff: https://github.com/llvm/llvm-project/pull/171184.diff
3 Files Affected:
- (modified) llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp (+14)
- (modified) llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp (+4)
- (added) llvm/test/CodeGen/WebAssembly/fake-use.ll (+25)
``diff
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index e6486e247209b..5c3127e2d3dc6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -216,6 +216,18 @@ static MachineInstr *findStartOfTree(MachineOperand &MO,
return Def;
}
+// FAKE_USEs are no-ops, so remove them here so that the values used by them
+// will be correctly dropped later.
+static void removeFakeUses(MachineFunction &MF) {
+ SmallVector ToDelete;
+ for (auto &MBB : MF)
+for (auto &MI : MBB)
+ if (MI.isFakeUse())
+ToDelete.push_back(&MI);
+ for (auto *MI : ToDelete)
+MI->eraseFromParent();
+}
+
bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "** Make Locals Explicit **\n"
"** Function: "
@@ -226,6 +238,8 @@ bool
WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
WebAssemblyFunctionInfo &MFI = *MF.getInfo();
const auto *TII = MF.getSubtarget().getInstrInfo();
+ removeFakeUses(MF);
+
// Map non-stackified virtual registers to their local ids.
DenseMap Reg2Local;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index bc91c6424b63e..fd13ef9a1921d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -866,6 +866,10 @@ bool
WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
if (Insert->isDebugValue())
continue;
+ // Ignore FAKE_USEs, which are no-ops and will be deleted later.
+ if (Insert->isFakeUse())
+continue;
+
// Iterate through the inputs in reverse order, since we'll be pulling
// operands off the stack in LIFO order.
CommutingState Commuting;
diff --git a/llvm/test/CodeGen/WebAssembly/fake-use.ll
b/llvm/test/CodeGen/WebAssembly/fake-use.ll
new file mode 100644
index 0..a18ce33566df0
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/fake-use.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s | llvm-mc -triple=wasm32-unknown-unknown
+
+target triple = "wasm32-unknown-unknown"
+
+define void @fake_use() {
+ %t = call i32 @foo()
+ tail call void (...) @llvm.fake.use(i32 %t)
+ ret void
+}
+
+; %t shouldn't be converted to TEE in RegStackify, because the FAKE_USE will be
+; deleted in the beginning of ExplicitLocals.
+define void @fake_use_no_tee() {
+ %t = call i32 @foo()
+ tail call void (...) @llvm.fake.use(i32 %t)
+ call void @use(i32 %t)
+ ret void
+}
+
+declare i32 @foo()
+declare void @use(i32 %t)
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn
memory(inaccessiblemem: readwrite)
+declare void @llvm.fake.use(...) #0
+
+attributes #0 = { mustprogress nocallback nofree nosync nounwind willreturn
memory(inaccessiblemem: readwrite) }
``
https://github.com/llvm/llvm-project/pull/171184
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use different name scope for MIMGEncoding and MIMGDim (PR #171166)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/171166 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][AArch64] Replace R_AARCH64_AUTH_ABS64 addend hack (PR #171192)
@@ -585,7 +585,7 @@ struct RelativeReloc {
return inputSec->getVA(inputSec->relocs()[relocIdx].offset);
}
- const InputSectionBase *inputSec;
+ InputSectionBase *inputSec;
jrtc27 wrote:
Could const_cast instead in finalizeAddressDependentContent, whichever's deemed
better for this case (note that *inputSec is const in DynamicReloc, so removing
it here creates disparity)
https://github.com/llvm/llvm-project/pull/171192
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][AArch64] Replace R_AARCH64_AUTH_ABS64 addend hack (PR #171192)
llvmbot wrote:
@llvm/pr-subscribers-lld
Author: Jessica Clarke (jrtc27)
Changes
Rather than trying to infer deep down in AArch64::relocate whether we
need to actually write anything or not, we should instead mark the
relocations that we no longer want so we don't actually apply them. This
is similar to how X86_64::deleteFallThruJmpInsn works, although given
the target is still valid we don't need to mess with the offset, just
the expr.
---
Full diff: https://github.com/llvm/llvm-project/pull/171192.diff
3 Files Affected:
- (modified) lld/ELF/Arch/AArch64.cpp (+7-11)
- (modified) lld/ELF/SyntheticSections.h (+1-1)
- (modified) lld/ELF/Writer.cpp (+2-1)
``diff
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index f68403b69419f..34cca88ae63b0 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -533,17 +533,11 @@ void AArch64::relocate(uint8_t *loc, const Relocation
&rel,
write64(ctx, loc, val);
break;
case R_AARCH64_AUTH_ABS64:
-// If val is wider than 32 bits, the relocation must have been moved from
-// .relr.auth.dyn to .rela.dyn, and the addend write is not needed.
-//
-// If val fits in 32 bits, we have two potential scenarios:
-// * True RELR: Write the 32-bit `val`.
-// * RELA: Even if the value now fits in 32 bits, it might have been
-// converted from RELR during an iteration in
-// finalizeAddressDependentContent(). Writing the value is harmless
-// because dynamic linking ignores it.
-if (isInt<32>(val))
- write32(ctx, loc, val);
+// This is used for the addend of a .relr.auth.dyn entry,
+// which is a 32-bit value; the upper 32 bits are used to
+// encode the schema.
+checkInt(ctx, loc, val, 32, rel);
+write32(ctx, loc, val);
break;
case R_AARCH64_ADD_ABS_LO12_NC:
case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
@@ -935,6 +929,8 @@ void AArch64::relocateAlloc(InputSection &sec, uint8_t
*buf) const {
AArch64Relaxer relaxer(ctx, sec.relocs());
for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) {
const Relocation &rel = sec.relocs()[i];
+if (rel.expr == R_NONE) // See finalizeAddressDependentContent()
+ continue;
uint8_t *loc = buf + rel.offset;
const uint64_t val = sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset);
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 72711aa75aec9..2b5897c9a40b0 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -585,7 +585,7 @@ struct RelativeReloc {
return inputSec->getVA(inputSec->relocs()[relocIdx].offset);
}
- const InputSectionBase *inputSec;
+ InputSectionBase *inputSec;
size_t relocIdx;
};
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 083b4fb1dbd22..db5626e701ad6 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1583,9 +1583,10 @@ template void
Writer::finalizeAddressDependentContent() {
if (part.relrAuthDyn) {
auto it = llvm::remove_if(
part.relrAuthDyn->relocs, [this, &part](const RelativeReloc &elem)
{
- const Relocation &reloc = elem.inputSec->relocs()[elem.relocIdx];
+ Relocation &reloc = elem.inputSec->relocs()[elem.relocIdx];
if (isInt<32>(reloc.sym->getVA(ctx, reloc.addend)))
return false;
+ reloc.expr = R_NONE;
part.relaDyn->addReloc({R_AARCH64_AUTH_RELATIVE, elem.inputSec,
reloc.offset, false, *reloc.sym,
reloc.addend, R_ABS});
``
https://github.com/llvm/llvm-project/pull/171192
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][AArch64] Replace R_AARCH64_AUTH_ABS64 addend hack (PR #171192)
llvmbot wrote:
@llvm/pr-subscribers-lld-elf
Author: Jessica Clarke (jrtc27)
Changes
Rather than trying to infer deep down in AArch64::relocate whether we
need to actually write anything or not, we should instead mark the
relocations that we no longer want so we don't actually apply them. This
is similar to how X86_64::deleteFallThruJmpInsn works, although given
the target is still valid we don't need to mess with the offset, just
the expr.
---
Full diff: https://github.com/llvm/llvm-project/pull/171192.diff
3 Files Affected:
- (modified) lld/ELF/Arch/AArch64.cpp (+7-11)
- (modified) lld/ELF/SyntheticSections.h (+1-1)
- (modified) lld/ELF/Writer.cpp (+2-1)
``diff
diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp
index f68403b69419f..34cca88ae63b0 100644
--- a/lld/ELF/Arch/AArch64.cpp
+++ b/lld/ELF/Arch/AArch64.cpp
@@ -533,17 +533,11 @@ void AArch64::relocate(uint8_t *loc, const Relocation
&rel,
write64(ctx, loc, val);
break;
case R_AARCH64_AUTH_ABS64:
-// If val is wider than 32 bits, the relocation must have been moved from
-// .relr.auth.dyn to .rela.dyn, and the addend write is not needed.
-//
-// If val fits in 32 bits, we have two potential scenarios:
-// * True RELR: Write the 32-bit `val`.
-// * RELA: Even if the value now fits in 32 bits, it might have been
-// converted from RELR during an iteration in
-// finalizeAddressDependentContent(). Writing the value is harmless
-// because dynamic linking ignores it.
-if (isInt<32>(val))
- write32(ctx, loc, val);
+// This is used for the addend of a .relr.auth.dyn entry,
+// which is a 32-bit value; the upper 32 bits are used to
+// encode the schema.
+checkInt(ctx, loc, val, 32, rel);
+write32(ctx, loc, val);
break;
case R_AARCH64_ADD_ABS_LO12_NC:
case R_AARCH64_AUTH_GOT_ADD_LO12_NC:
@@ -935,6 +929,8 @@ void AArch64::relocateAlloc(InputSection &sec, uint8_t
*buf) const {
AArch64Relaxer relaxer(ctx, sec.relocs());
for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) {
const Relocation &rel = sec.relocs()[i];
+if (rel.expr == R_NONE) // See finalizeAddressDependentContent()
+ continue;
uint8_t *loc = buf + rel.offset;
const uint64_t val = sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset);
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 72711aa75aec9..2b5897c9a40b0 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -585,7 +585,7 @@ struct RelativeReloc {
return inputSec->getVA(inputSec->relocs()[relocIdx].offset);
}
- const InputSectionBase *inputSec;
+ InputSectionBase *inputSec;
size_t relocIdx;
};
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 083b4fb1dbd22..db5626e701ad6 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1583,9 +1583,10 @@ template void
Writer::finalizeAddressDependentContent() {
if (part.relrAuthDyn) {
auto it = llvm::remove_if(
part.relrAuthDyn->relocs, [this, &part](const RelativeReloc &elem)
{
- const Relocation &reloc = elem.inputSec->relocs()[elem.relocIdx];
+ Relocation &reloc = elem.inputSec->relocs()[elem.relocIdx];
if (isInt<32>(reloc.sym->getVA(ctx, reloc.addend)))
return false;
+ reloc.expr = R_NONE;
part.relaDyn->addReloc({R_AARCH64_AUTH_RELATIVE, elem.inputSec,
reloc.offset, false, *reloc.sym,
reloc.addend, R_ABS});
``
https://github.com/llvm/llvm-project/pull/171192
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)
https://github.com/cmc-rep edited https://github.com/llvm/llvm-project/pull/169476 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][AArch64] Replace R_AARCH64_AUTH_ABS64 addend hack (PR #171192)
https://github.com/jrtc27 created https://github.com/llvm/llvm-project/pull/171192 Rather than trying to infer deep down in AArch64::relocate whether we need to actually write anything or not, we should instead mark the relocations that we no longer want so we don't actually apply them. This is similar to how X86_64::deleteFallThruJmpInsn works, although given the target is still valid we don't need to mess with the offset, just the expr. ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC] [FlowSensitive] Add mock unique_ptr header (PR #170942)
https://github.com/jvoung approved this pull request. https://github.com/llvm/llvm-project/pull/170942 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] Prepare libcxx and libcxxabi for pointer field protection. (PR #151651)
ojhunt wrote: > `std::trivially_relocate` and friends have been removed from C++26. and the > new equivalent of `std::trivially_relocate` in C++29 is very likely to be > based on `memcpy` or `memmov` . I am not sure how PFP can work with the > future `std::trivially_relocate` at all to be honest I'm unsure why that is being assumed here - the assertion that "trivial" means memcpy is not supported by the existing usage of trivial in the specification. It would in fact mean that the meaning of "trivial" in "trivially_relocate" is different from every other usage in the standard. https://github.com/llvm/llvm-project/pull/151651 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [CIR] Implement function personality attribute and its lowering (PR #171001)
andykaylor wrote: > What do you mean by CIRGenModule changes? I just added missing feature test, > as `setWindowsItaniumDLLImport` is unconditionally invoked on the path for > personality function generation, but it is still not supported. Yeah, sorry for the confusion. I didn't notice that was the only change in CIRGenModule. > I believe CIRGenException changes were already tested, I just moved > personality funciton generation from LowerToLLVM. The `clang/test/CIR/Lowering/eh-inflight.cir` test only covers the lowering from CIR case. There is no test that handles the generation of the personality attribute from C++ source. https://github.com/llvm/llvm-project/pull/171001 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] clang/HIP: Avoid using ocml logb (PR #171186)
llvmbot wrote:
@llvm/pr-subscribers-backend-x86
Author: Matt Arsenault (arsenm)
Changes
We have special case handling for the logb builtins, so use them.
---
Full diff: https://github.com/llvm/llvm-project/pull/171186.diff
2 Files Affected:
- (modified) clang/lib/Headers/__clang_hip_math.h (+2-2)
- (modified) clang/test/Headers/__clang_hip_math.hip (+116-46)
``diff
diff --git a/clang/lib/Headers/__clang_hip_math.h
b/clang/lib/Headers/__clang_hip_math.h
index 759e742c9d012..03c2721b4ad3c 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -498,7 +498,7 @@ __DEVICE__
float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __builtin_log2f)(__x);
}
__DEVICE__
-float logbf(float __x) { return __ocml_logb_f32(__x); }
+float logbf(float __x) { return __builtin_logbf(__x); }
__DEVICE__
float logf(float __x) { return __FAST_OR_SLOW(__logf, __builtin_logf)(__x); }
@@ -901,7 +901,7 @@ __DEVICE__
double log2(double __x) { return __ocml_log2_f64(__x); }
__DEVICE__
-double logb(double __x) { return __ocml_logb_f64(__x); }
+double logb(double __x) { return __builtin_logb(__x); }
__DEVICE__
long int lrint(double __x) { return __builtin_rint(__x); }
diff --git a/clang/test/Headers/__clang_hip_math.hip
b/clang/test/Headers/__clang_hip_math.hip
index 4163666811c91..426e5af319cbf 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -3871,69 +3871,139 @@ extern "C" __device__ double test_log2(double x) {
return log2(x);
}
-// DEFAULT-LABEL: define dso_local noundef float @test_logbf(
-// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-LABEL: define dso_local float @test_logbf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
-// DEFAULT-NEXT:[[CALL_I:%.*]] = tail call contract noundef float
@__ocml_logb_f32(float noundef [[X]]) #[[ATTR13]]
-// DEFAULT-NEXT:ret float [[CALL_I]]
-//
-// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float
@test_logbf(
-// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]])
local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:[[TMP0:%.*]] = tail call { float, i32 }
@llvm.frexp.f32.i32(float [[X]])
+// DEFAULT-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// DEFAULT-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// DEFAULT-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// DEFAULT-NEXT:[[TMP4:%.*]] = tail call contract float
@llvm.fabs.f32(float [[X]])
+// DEFAULT-NEXT:[[TMP5:%.*]] = fcmp contract one float [[TMP4]],
0x7FF0
+// DEFAULT-NEXT:[[TMP6:%.*]] = select contract i1 [[TMP5]], float
[[TMP3]], float [[TMP4]]
+// DEFAULT-NEXT:[[TMP7:%.*]] = fcmp contract oeq float [[X]], 0.00e+00
+// DEFAULT-NEXT:[[TMP8:%.*]] = select contract i1 [[TMP7]], float
0xFFF0, float [[TMP6]]
+// DEFAULT-NEXT:ret float [[TMP8]]
+//
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_logbf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]])
local_unnamed_addr #[[ATTR3]] {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
-// FINITEONLY-NEXT:[[CALL_I:%.*]] = tail call nnan ninf contract noundef
nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf)
[[X]]) #[[ATTR13]]
-// FINITEONLY-NEXT:ret float [[CALL_I]]
+// FINITEONLY-NEXT:[[TMP0:%.*]] = tail call { float, i32 }
@llvm.frexp.f32.i32(float nofpclass(nan inf) [[X]])
+// FINITEONLY-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// FINITEONLY-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// FINITEONLY-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// FINITEONLY-NEXT:ret float [[TMP3]]
//
-// APPROX-LABEL: define dso_local noundef float @test_logbf(
-// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-LABEL: define dso_local float @test_logbf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
-// APPROX-NEXT:[[CALL_I:%.*]] = tail call contract noundef float
@__ocml_logb_f32(float noundef [[X]]) #[[ATTR13]]
-// APPROX-NEXT:ret float [[CALL_I]]
-//
-// NCRDIV-LABEL: define dso_local noundef float @test_logbf(
-// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:[[TMP0:%.*]] = tail call { float, i32 }
@llvm.frexp.f32.i32(float [[X]])
+// APPROX-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// APPROX-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// APPROX-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// APPROX-NEXT:[[TMP4:%.*]] = tail call contract float
@llvm.fabs.f32(float [[X]])
+// APPROX-NEXT:[[TMP5:%.*]] = fcmp contract one float [[TMP4]],
0x7FF0
+// APPROX-NEXT:[[TMP6:%.*]] = select contract i1 [[TMP5]], float [[TMP3]],
float [[TMP4]]
+//
[llvm-branch-commits] [clang] clang/HIP: Avoid using ocml logb (PR #171186)
llvmbot wrote:
@llvm/pr-subscribers-clang
Author: Matt Arsenault (arsenm)
Changes
We have special case handling for the logb builtins, so use them.
---
Full diff: https://github.com/llvm/llvm-project/pull/171186.diff
2 Files Affected:
- (modified) clang/lib/Headers/__clang_hip_math.h (+2-2)
- (modified) clang/test/Headers/__clang_hip_math.hip (+116-46)
``diff
diff --git a/clang/lib/Headers/__clang_hip_math.h
b/clang/lib/Headers/__clang_hip_math.h
index 759e742c9d012..03c2721b4ad3c 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -498,7 +498,7 @@ __DEVICE__
float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __builtin_log2f)(__x);
}
__DEVICE__
-float logbf(float __x) { return __ocml_logb_f32(__x); }
+float logbf(float __x) { return __builtin_logbf(__x); }
__DEVICE__
float logf(float __x) { return __FAST_OR_SLOW(__logf, __builtin_logf)(__x); }
@@ -901,7 +901,7 @@ __DEVICE__
double log2(double __x) { return __ocml_log2_f64(__x); }
__DEVICE__
-double logb(double __x) { return __ocml_logb_f64(__x); }
+double logb(double __x) { return __builtin_logb(__x); }
__DEVICE__
long int lrint(double __x) { return __builtin_rint(__x); }
diff --git a/clang/test/Headers/__clang_hip_math.hip
b/clang/test/Headers/__clang_hip_math.hip
index 4163666811c91..426e5af319cbf 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -3871,69 +3871,139 @@ extern "C" __device__ double test_log2(double x) {
return log2(x);
}
-// DEFAULT-LABEL: define dso_local noundef float @test_logbf(
-// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-LABEL: define dso_local float @test_logbf(
+// DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// DEFAULT-NEXT: [[ENTRY:.*:]]
-// DEFAULT-NEXT:[[CALL_I:%.*]] = tail call contract noundef float
@__ocml_logb_f32(float noundef [[X]]) #[[ATTR13]]
-// DEFAULT-NEXT:ret float [[CALL_I]]
-//
-// FINITEONLY-LABEL: define dso_local noundef nofpclass(nan inf) float
@test_logbf(
-// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]])
local_unnamed_addr #[[ATTR4]] {
+// DEFAULT-NEXT:[[TMP0:%.*]] = tail call { float, i32 }
@llvm.frexp.f32.i32(float [[X]])
+// DEFAULT-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// DEFAULT-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// DEFAULT-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// DEFAULT-NEXT:[[TMP4:%.*]] = tail call contract float
@llvm.fabs.f32(float [[X]])
+// DEFAULT-NEXT:[[TMP5:%.*]] = fcmp contract one float [[TMP4]],
0x7FF0
+// DEFAULT-NEXT:[[TMP6:%.*]] = select contract i1 [[TMP5]], float
[[TMP3]], float [[TMP4]]
+// DEFAULT-NEXT:[[TMP7:%.*]] = fcmp contract oeq float [[X]], 0.00e+00
+// DEFAULT-NEXT:[[TMP8:%.*]] = select contract i1 [[TMP7]], float
0xFFF0, float [[TMP6]]
+// DEFAULT-NEXT:ret float [[TMP8]]
+//
+// FINITEONLY-LABEL: define dso_local nofpclass(nan inf) float @test_logbf(
+// FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]])
local_unnamed_addr #[[ATTR3]] {
// FINITEONLY-NEXT: [[ENTRY:.*:]]
-// FINITEONLY-NEXT:[[CALL_I:%.*]] = tail call nnan ninf contract noundef
nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf)
[[X]]) #[[ATTR13]]
-// FINITEONLY-NEXT:ret float [[CALL_I]]
+// FINITEONLY-NEXT:[[TMP0:%.*]] = tail call { float, i32 }
@llvm.frexp.f32.i32(float nofpclass(nan inf) [[X]])
+// FINITEONLY-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// FINITEONLY-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// FINITEONLY-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// FINITEONLY-NEXT:ret float [[TMP3]]
//
-// APPROX-LABEL: define dso_local noundef float @test_logbf(
-// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-LABEL: define dso_local float @test_logbf(
+// APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
// APPROX-NEXT: [[ENTRY:.*:]]
-// APPROX-NEXT:[[CALL_I:%.*]] = tail call contract noundef float
@__ocml_logb_f32(float noundef [[X]]) #[[ATTR13]]
-// APPROX-NEXT:ret float [[CALL_I]]
-//
-// NCRDIV-LABEL: define dso_local noundef float @test_logbf(
-// NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR4]] {
+// APPROX-NEXT:[[TMP0:%.*]] = tail call { float, i32 }
@llvm.frexp.f32.i32(float [[X]])
+// APPROX-NEXT:[[TMP1:%.*]] = extractvalue { float, i32 } [[TMP0]], 1
+// APPROX-NEXT:[[TMP2:%.*]] = add nsw i32 [[TMP1]], -1
+// APPROX-NEXT:[[TMP3:%.*]] = sitofp i32 [[TMP2]] to float
+// APPROX-NEXT:[[TMP4:%.*]] = tail call contract float
@llvm.fabs.f32(float [[X]])
+// APPROX-NEXT:[[TMP5:%.*]] = fcmp contract one float [[TMP4]],
0x7FF0
+// APPROX-NEXT:[[TMP6:%.*]] = select contract i1 [[TMP5]], float [[TMP3]],
float [[TMP4]]
+// APPRO
[llvm-branch-commits] [clang] clang/HIP: Avoid using ocml logb (PR #171186)
https://github.com/jhuber6 approved this pull request. https://github.com/llvm/llvm-project/pull/171186 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LTT] Add `unknown` branch weights when lowering type tests with conditional (PR #170752)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/170752
>From ea48cf14b27d5ec6d0cbe02aaa6f10f5fa9299d0 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Thu, 4 Dec 2025 13:48:43 -0800
Subject: [PATCH] [LTT] Add `unknown` branch weights when lowering type tests
with conditional
---
llvm/lib/Transforms/IPO/LowerTypeTests.cpp| 6 -
llvm/test/Transforms/LowerTypeTests/import.ll | 23 +++
llvm/utils/profcheck-xfail.txt| 2 --
3 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index f7aeda95e41b3..06deea8ba5848 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -48,12 +48,14 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/ModuleSummaryIndexYAML.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/ReplaceConstant.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -802,7 +804,9 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata
*TypeId, CallInst *CI,
return createBitSetTest(ThenB, TIL, BitOffset);
}
- IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false));
+ MDBuilder MDB(M.getContext());
+ IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false,
+
MDB.createLikelyBranchWeights()));
// Now that we know that the offset is in range and aligned, load the
// appropriate bit from the bitset.
diff --git a/llvm/test/Transforms/LowerTypeTests/import.ll
b/llvm/test/Transforms/LowerTypeTests/import.ll
index e3c2d8a3d3e8c..1583dda58cddc 100644
--- a/llvm/test/Transforms/LowerTypeTests/import.ll
+++ b/llvm/test/Transforms/LowerTypeTests/import.ll
@@ -92,7 +92,7 @@ define i1 @bytearray7(ptr %p) {
; X86-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr
@__typeid_bytearray7_global_addr to i64), [[TMP1]]
; X86-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64
[[TMP2]], i64 ptrtoint (ptr @__typeid_bytearray7_align to i64))
; X86-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], ptrtoint (ptr
@__typeid_bytearray7_size_m1 to i64)
-; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof
[[PROF6:![0-9]+]]
; X86: 5:
; X86-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr
@__typeid_bytearray7_byte_array, i64 [[TMP3]]
; X86-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -109,7 +109,7 @@ define i1 @bytearray7(ptr %p) {
; ARM-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr
@__typeid_bytearray7_global_addr to i64), [[TMP1]]
; ARM-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64
[[TMP2]], i64 3)
; ARM-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], 43
-; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof
[[PROF0:![0-9]+]]
; ARM: 5:
; ARM-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr
@__typeid_bytearray7_byte_array, i64 [[TMP3]]
; ARM-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -131,7 +131,7 @@ define i1 @bytearray32(ptr %p) {
; X86-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr
@__typeid_bytearray32_global_addr to i64), [[TMP1]]
; X86-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64
[[TMP2]], i64 ptrtoint (ptr @__typeid_bytearray32_align to i64))
; X86-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], ptrtoint (ptr
@__typeid_bytearray32_size_m1 to i64)
-; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof
[[PROF6]]
; X86: 5:
; X86-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr
@__typeid_bytearray32_byte_array, i64 [[TMP3]]
; X86-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -148,7 +148,7 @@ define i1 @bytearray32(ptr %p) {
; ARM-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr
@__typeid_bytearray32_global_addr to i64), [[TMP1]]
; ARM-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64
[[TMP2]], i64 4)
; ARM-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], 12346
-; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof
[[PROF0]]
; ARM: 5:
; ARM-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr
@__typeid_bytearray32_byte_array, i64 [[TMP3]]
; ARM-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -170,7 +170,7 @@ define i1 @inline5(ptr %p) {
; X86-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr
@__t
[llvm-branch-commits] [llvm] [LTT] Add `unknown` branch weights when lowering type tests with conditional (PR #170752)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/170752
>From ea48cf14b27d5ec6d0cbe02aaa6f10f5fa9299d0 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Thu, 4 Dec 2025 13:48:43 -0800
Subject: [PATCH] [LTT] Add `unknown` branch weights when lowering type tests
with conditional
---
llvm/lib/Transforms/IPO/LowerTypeTests.cpp| 6 -
llvm/test/Transforms/LowerTypeTests/import.ll | 23 +++
llvm/utils/profcheck-xfail.txt| 2 --
3 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index f7aeda95e41b3..06deea8ba5848 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -48,12 +48,14 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/ModuleSummaryIndexYAML.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/ReplaceConstant.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -802,7 +804,9 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata
*TypeId, CallInst *CI,
return createBitSetTest(ThenB, TIL, BitOffset);
}
- IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false));
+ MDBuilder MDB(M.getContext());
+ IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false,
+
MDB.createLikelyBranchWeights()));
// Now that we know that the offset is in range and aligned, load the
// appropriate bit from the bitset.
diff --git a/llvm/test/Transforms/LowerTypeTests/import.ll
b/llvm/test/Transforms/LowerTypeTests/import.ll
index e3c2d8a3d3e8c..1583dda58cddc 100644
--- a/llvm/test/Transforms/LowerTypeTests/import.ll
+++ b/llvm/test/Transforms/LowerTypeTests/import.ll
@@ -92,7 +92,7 @@ define i1 @bytearray7(ptr %p) {
; X86-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr
@__typeid_bytearray7_global_addr to i64), [[TMP1]]
; X86-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64
[[TMP2]], i64 ptrtoint (ptr @__typeid_bytearray7_align to i64))
; X86-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], ptrtoint (ptr
@__typeid_bytearray7_size_m1 to i64)
-; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof
[[PROF6:![0-9]+]]
; X86: 5:
; X86-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr
@__typeid_bytearray7_byte_array, i64 [[TMP3]]
; X86-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -109,7 +109,7 @@ define i1 @bytearray7(ptr %p) {
; ARM-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr
@__typeid_bytearray7_global_addr to i64), [[TMP1]]
; ARM-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64
[[TMP2]], i64 3)
; ARM-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], 43
-; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof
[[PROF0:![0-9]+]]
; ARM: 5:
; ARM-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr
@__typeid_bytearray7_byte_array, i64 [[TMP3]]
; ARM-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -131,7 +131,7 @@ define i1 @bytearray32(ptr %p) {
; X86-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr
@__typeid_bytearray32_global_addr to i64), [[TMP1]]
; X86-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64
[[TMP2]], i64 ptrtoint (ptr @__typeid_bytearray32_align to i64))
; X86-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], ptrtoint (ptr
@__typeid_bytearray32_size_m1 to i64)
-; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; X86-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof
[[PROF6]]
; X86: 5:
; X86-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr
@__typeid_bytearray32_byte_array, i64 [[TMP3]]
; X86-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -148,7 +148,7 @@ define i1 @bytearray32(ptr %p) {
; ARM-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr
@__typeid_bytearray32_global_addr to i64), [[TMP1]]
; ARM-NEXT:[[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP2]], i64
[[TMP2]], i64 4)
; ARM-NEXT:[[TMP4:%.*]] = icmp ule i64 [[TMP3]], 12346
-; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]]
+; ARM-NEXT:br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP10:%.*]], !prof
[[PROF0]]
; ARM: 5:
; ARM-NEXT:[[TMP6:%.*]] = getelementptr i8, ptr
@__typeid_bytearray32_byte_array, i64 [[TMP3]]
; ARM-NEXT:[[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
@@ -170,7 +170,7 @@ define i1 @inline5(ptr %p) {
; X86-NEXT:[[TMP2:%.*]] = sub i64 ptrtoint (ptr
@__t
[llvm-branch-commits] [llvm] release/21.x: [WebAssembly] Remove FAKE_USEs before ExplicitLocals (#160768) (PR #171184)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/171184 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)
@@ -0,0 +1,425 @@
+//===-- AMDGPUMachineLevelInliner.cpp - AMDGPU Machine Level Inliner ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "AMDGPUMachineLevelInliner.h"
+#include "AMDGPU.h"
+#include "AMDGPUMachineModuleInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManagers.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-machine-level-inliner"
+
+namespace {
+class AMDGPUInliningPassManager : public FPPassManager {
+public:
+ static char ID;
+
+ explicit AMDGPUInliningPassManager() : FPPassManager(ID) {}
+
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool doFinalization(Module &M) override;
+
+ StringRef getPassName() const override {
+return "AMDGPU Inlining Pass Manager";
+ }
+};
+
+/// AMDGPUInliningAnchor - A machine function pass that serves as an anchor for
+/// setting up the AMDGPU inlining pass manager infrastructure. It makes sure
+/// the inliner is run via an AMDGPUInliningPassManager. It can be run well in
+/// advance of the inliner as long as there are only FunctionPasses in between.
+class AMDGPUInliningAnchor : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification
+
+ AMDGPUInliningAnchor() : MachineFunctionPass(ID) {}
+
+ // We don't really need to process any functions here.
+ bool runOnMachineFunction(MachineFunction &MF) override { return false; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ StringRef getPassName() const override;
+
+ /// Prepare the pass manager stack for the inliner. This will push an
+ /// `AMDGPUInliningPassManager` onto the stack.
+ void preparePassManager(PMStack &Stack) override;
+};
+
+} // end anonymous namespace.
+
+// Pass identification
+char AMDGPUMachineLevelInliner::ID = 0;
+char AMDGPUInliningPassManager::ID = 0;
+char AMDGPUInliningAnchor::ID = 0;
+
+char &llvm::AMDGPUMachineLevelInlinerID = AMDGPUMachineLevelInliner::ID;
+char &llvm::AMDGPUInliningAnchorID = AMDGPUInliningAnchor::ID;
+
+INITIALIZE_PASS_BEGIN(AMDGPUMachineLevelInliner, DEBUG_TYPE,
+ "AMDGPU Machine Level Inliner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUInliningAnchor)
+INITIALIZE_PASS_END(AMDGPUMachineLevelInliner, DEBUG_TYPE,
+"AMDGPU Machine Level Inliner", false, false)
+
+INITIALIZE_PASS_BEGIN(AMDGPUInliningAnchor, "amdgpu-inlining-anchor",
+ "AMDGPU Inlining Anchor", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass)
+INITIALIZE_PASS_END(AMDGPUInliningAnchor, "amdgpu-inlining-anchor",
+"AMDGPU Inlining Anchor", false, true)
+
+AMDGPUMachineLevelInliner::AMDGPUMachineLevelInliner()
+: MachineFunctionPass(ID) {
+ initializeAMDGPUMachineLevelInlinerPass(*PassRegistry::getPassRegistry());
+}
+
+void AMDGPUMachineLevelInliner::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired();
+ AU.addRequired();
+ AU.addPreserved();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool AMDGPUMachineLevelInliner::runOnMachineFunction(MachineFunction &MF) {
+ MachineModuleInfo &MMI =
getAnalysis().getMMI();
+
+ Function &F = MF.getFunction();
+ if (shouldInlineCallsTo(F)) {
+// Mark the function as machine-inlined in AMDGPUMachineModuleInfo. This
+// tells the inlining pass manager to stop processing it.
+auto &AMMMI = MMI.getObjFileInfo();
+AMMMI.addMachineInlinedFunction(F);
+
+return false;
+ }
+
+ bool Changed = false;
+
+ // Can't inline anything if there aren't any calls.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.hasCalls() && !MFI.hasTailCall())
+return false;
+
+ // Collect calls to inline.
+ SmallVector CallsToInline;
+ const SIInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+
+ for (auto &MBB : MF) {
+for (auto &MI : MBB) {
+ if (!MI.isCall())
+continue;
+
+ const MachineOperand *CalleeOp =
+ TII->getNamedOperand(MI,
[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)
@@ -0,0 +1,59 @@ +//===-- AMDGPUMachineLevelInliner.h - AMDGPU Machine Level Inliner -*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// This file defines the AMDGPUMachineLevelInliner pass, which performs +// machine-level inlining for AMDGPU targets. shiltian wrote: Is this something that can be generic? https://github.com/llvm/llvm-project/pull/169476 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add machine-level inliner pass (PR #169476)
https://github.com/shiltian edited https://github.com/llvm/llvm-project/pull/169476 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [FlowSensitive] [StatusOr] [12/N] Add support for smart pointers (PR #170943)
https://github.com/jvoung approved this pull request. https://github.com/llvm/llvm-project/pull/170943 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][ELF] Move mergeRels/partitionRels into finalizeContents (PR #171203)
llvmbot wrote:
@llvm/pr-subscribers-lld
Author: Jessica Clarke (jrtc27)
Changes
Other than the ordering requirements that remain between sections, this
abstracts the details of how these sections are implemented.
Note that isNeeded already checks relocsVec for both section types, so
finalizeSynthetic can call it before mergeRels just fine.
---
Full diff: https://github.com/llvm/llvm-project/pull/171203.diff
3 Files Affected:
- (modified) lld/ELF/SyntheticSections.cpp (+5)
- (modified) lld/ELF/SyntheticSections.h (+4-3)
- (modified) lld/ELF/Writer.cpp (+3-14)
``diff
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index d6e214f9d0f48..c81f649861a73 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1707,6 +1707,9 @@ void RelocationBaseSection::partitionRels() {
}
void RelocationBaseSection::finalizeContents() {
+ mergeRels();
+ // Compute DT_RELACOUNT to be used by part.dynamic.
+ partitionRels();
SymbolTableBaseSection *symTab = getPartition(ctx).dynSymTab.get();
// When linking glibc statically, .rel{,a}.plt contains R_*_IRELATIVE
@@ -1796,6 +1799,8 @@ void RelrBaseSection::mergeRels() {
relocsVec.clear();
}
+void RelrBaseSection::finalizeContents() { mergeRels(); }
+
template
AndroidPackedRelocationSection::AndroidPackedRelocationSection(
Ctx &ctx, StringRef name, unsigned concurrency)
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 2b5897c9a40b0..8971d5f6c2349 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -530,14 +530,14 @@ class RelocationBaseSection : public SyntheticSection {
}
size_t getSize() const override { return relocs.size() * this->entsize; }
size_t getRelativeRelocCount() const { return numRelativeRelocs; }
- void mergeRels();
- void partitionRels();
void finalizeContents() override;
int32_t dynamicTag, sizeDynamicTag;
SmallVector relocs;
protected:
+ void mergeRels();
+ void partitionRels();
void computeRels();
// Used when parallel relocation scanning adds relocations. The elements
// will be moved into relocs by mergeRel().
@@ -608,14 +608,15 @@ class RelrBaseSection : public SyntheticSection {
isec.addReloc({expr, addendRelType, offsetInSec, addend, &sym});
addReloc({&isec, isec.relocs().size() - 1});
}
- void mergeRels();
bool isNeeded() const override {
return !relocs.empty() ||
llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); });
}
+ void finalizeContents() override;
SmallVector relocs;
protected:
+ void mergeRels();
SmallVector, 0> relocsVec;
};
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index db5626e701ad6..57202f42cce5b 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -2111,20 +2111,9 @@ template void
Writer::finalizeSections() {
// Dynamic section must be the last one in this list and dynamic
// symbol table section (dynSymTab) must be the first one.
for (Partition &part : ctx.partitions) {
- if (part.relaDyn) {
-part.relaDyn->mergeRels();
-// Compute DT_RELACOUNT to be used by part.dynamic.
-part.relaDyn->partitionRels();
-finalizeSynthetic(ctx, part.relaDyn.get());
- }
- if (part.relrDyn) {
-part.relrDyn->mergeRels();
-finalizeSynthetic(ctx, part.relrDyn.get());
- }
- if (part.relrAuthDyn) {
-part.relrAuthDyn->mergeRels();
-finalizeSynthetic(ctx, part.relrAuthDyn.get());
- }
+ finalizeSynthetic(ctx, part.relaDyn.get());
+ finalizeSynthetic(ctx, part.relrDyn.get());
+ finalizeSynthetic(ctx, part.relrAuthDyn.get());
finalizeSynthetic(ctx, part.dynSymTab.get());
finalizeSynthetic(ctx, part.gnuHashTab.get());
``
https://github.com/llvm/llvm-project/pull/171203
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][ELF] Move mergeRels/partitionRels into finalizeContents (PR #171203)
llvmbot wrote:
@llvm/pr-subscribers-lld-elf
Author: Jessica Clarke (jrtc27)
Changes
Other than the ordering requirements that remain between sections, this
abstracts the details of how these sections are implemented.
Note that isNeeded already checks relocsVec for both section types, so
finalizeSynthetic can call it before mergeRels just fine.
---
Full diff: https://github.com/llvm/llvm-project/pull/171203.diff
3 Files Affected:
- (modified) lld/ELF/SyntheticSections.cpp (+5)
- (modified) lld/ELF/SyntheticSections.h (+4-3)
- (modified) lld/ELF/Writer.cpp (+3-14)
``diff
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index d6e214f9d0f48..c81f649861a73 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1707,6 +1707,9 @@ void RelocationBaseSection::partitionRels() {
}
void RelocationBaseSection::finalizeContents() {
+ mergeRels();
+ // Compute DT_RELACOUNT to be used by part.dynamic.
+ partitionRels();
SymbolTableBaseSection *symTab = getPartition(ctx).dynSymTab.get();
// When linking glibc statically, .rel{,a}.plt contains R_*_IRELATIVE
@@ -1796,6 +1799,8 @@ void RelrBaseSection::mergeRels() {
relocsVec.clear();
}
+void RelrBaseSection::finalizeContents() { mergeRels(); }
+
template
AndroidPackedRelocationSection::AndroidPackedRelocationSection(
Ctx &ctx, StringRef name, unsigned concurrency)
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 2b5897c9a40b0..8971d5f6c2349 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -530,14 +530,14 @@ class RelocationBaseSection : public SyntheticSection {
}
size_t getSize() const override { return relocs.size() * this->entsize; }
size_t getRelativeRelocCount() const { return numRelativeRelocs; }
- void mergeRels();
- void partitionRels();
void finalizeContents() override;
int32_t dynamicTag, sizeDynamicTag;
SmallVector relocs;
protected:
+ void mergeRels();
+ void partitionRels();
void computeRels();
// Used when parallel relocation scanning adds relocations. The elements
// will be moved into relocs by mergeRel().
@@ -608,14 +608,15 @@ class RelrBaseSection : public SyntheticSection {
isec.addReloc({expr, addendRelType, offsetInSec, addend, &sym});
addReloc({&isec, isec.relocs().size() - 1});
}
- void mergeRels();
bool isNeeded() const override {
return !relocs.empty() ||
llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); });
}
+ void finalizeContents() override;
SmallVector relocs;
protected:
+ void mergeRels();
SmallVector, 0> relocsVec;
};
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index db5626e701ad6..57202f42cce5b 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -2111,20 +2111,9 @@ template void
Writer::finalizeSections() {
// Dynamic section must be the last one in this list and dynamic
// symbol table section (dynSymTab) must be the first one.
for (Partition &part : ctx.partitions) {
- if (part.relaDyn) {
-part.relaDyn->mergeRels();
-// Compute DT_RELACOUNT to be used by part.dynamic.
-part.relaDyn->partitionRels();
-finalizeSynthetic(ctx, part.relaDyn.get());
- }
- if (part.relrDyn) {
-part.relrDyn->mergeRels();
-finalizeSynthetic(ctx, part.relrDyn.get());
- }
- if (part.relrAuthDyn) {
-part.relrAuthDyn->mergeRels();
-finalizeSynthetic(ctx, part.relrAuthDyn.get());
- }
+ finalizeSynthetic(ctx, part.relaDyn.get());
+ finalizeSynthetic(ctx, part.relrDyn.get());
+ finalizeSynthetic(ctx, part.relrAuthDyn.get());
finalizeSynthetic(ctx, part.dynSymTab.get());
finalizeSynthetic(ctx, part.gnuHashTab.get());
``
https://github.com/llvm/llvm-project/pull/171203
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][ELF] Move mergeRels/partitionRels into finalizeContents (PR #171203)
https://github.com/jrtc27 created https://github.com/llvm/llvm-project/pull/171203 Other than the ordering requirements that remain between sections, this abstracts the details of how these sections are implemented. Note that isNeeded already checks relocsVec for both section types, so finalizeSynthetic can call it before mergeRels just fine. ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][ELF][AArch64][PAC] Teach addRelativeReloc to emit R_AARCH64_AUTH_RELATIVE (PR #171180)
https://github.com/jrtc27 edited https://github.com/llvm/llvm-project/pull/171180 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][ELF][AArch64][PAC] Use existing addSymbolReloc for R_AARCH64_AUTH_ABS64 (PR #171179)
https://github.com/jrtc27 edited https://github.com/llvm/llvm-project/pull/171179 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [ELF][AArch64][PAC] Replace R_AARCH64_AUTH_ABS64 addend hack (PR #171192)
https://github.com/jrtc27 edited https://github.com/llvm/llvm-project/pull/171192 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][ELF][AArch64][MTE] Replace addend hack with less-confusing code (PR #171182)
https://github.com/jrtc27 edited https://github.com/llvm/llvm-project/pull/171182 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][ELF][AArch64][MTE] Don't duplicate addRelativeReloc call for MTE globals (PR #171181)
https://github.com/jrtc27 edited https://github.com/llvm/llvm-project/pull/171181 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] VectorCombine: Improve the insert/extract fold in the narrowing case (PR #168820)
https://github.com/nhaehnle updated
https://github.com/llvm/llvm-project/pull/168820
From 2ee921f4d07bd5b457d5e79a848ab6183c3a8c52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?=
Date: Wed, 19 Nov 2025 18:00:32 -0800
Subject: [PATCH] VectorCombine: Improve the insert/extract fold in the
narrowing case
Keeping the extracted element in a natural position in the narrowed
vector has two beneficial effects:
1. It makes the narrowing shuffles cheaper (at least on AMDGPU), which
allows the insert/extract fold to trigger.
2. It makes the narrowing shuffles in a chain of extract/insert
compatible, which allows foldLengthChangingShuffles to successfully
recognize a chain that can be folded.
There are minor X86 test changes that look reasonable to me. The IR
change for AVX2 in
llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
doesn't change the assembly generated by `llc -mtriple=x86_64-- -mattr=AVX2`
at all.
commit-id:c151bb04
---
.../Transforms/Vectorize/VectorCombine.cpp| 22 +--
.../VectorCombine/AMDGPU/extract-insert-i8.ll | 18 ++-
.../X86/extract-insert-poison.ll | 12 ++
.../VectorCombine/X86/extract-insert.ll | 8 +++
.../Transforms/VectorCombine/X86/pr126085.ll | 4 ++--
5 files changed, 22 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b83597fec021a..4b081205eba10 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -4558,22 +4558,15 @@ bool
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
SmallVector Mask(NumDstElts, PoisonMaskElem);
bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
- bool IsExtIdxInBounds = ExtIdx < NumDstElts;
bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec);
if (NeedDstSrcSwap) {
SK = TargetTransformInfo::SK_PermuteSingleSrc;
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
- Mask[InsIdx] = 0;
-else
- Mask[InsIdx] = ExtIdx;
+Mask[InsIdx] = ExtIdx % NumDstElts;
std::swap(DstVec, SrcVec);
} else {
SK = TargetTransformInfo::SK_PermuteTwoSrc;
std::iota(Mask.begin(), Mask.end(), 0);
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
- Mask[InsIdx] = NumDstElts;
-else
- Mask[InsIdx] = ExtIdx + NumDstElts;
+Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
}
// Cost
@@ -4594,14 +4587,11 @@ bool
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
NewCost += TTI.getShuffleCost(SK, DstVecTy, DstVecTy, Mask, CostKind, 0,
nullptr, {DstVec, SrcVec});
} else {
-// When creating length-changing-vector, always create with a Mask whose
-// first element has an ExtIdx, so that the first element of the vector
-// being created is always the target to be extracted.
+// When creating a length-changing-vector, always try to keep the relevant
+// element in an equivalent position, so that bulk shuffles are more likely
+// to be useful.
ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
-if (IsExtIdxInBounds)
- ExtToVecMask[ExtIdx] = ExtIdx;
-else
- ExtToVecMask[0] = ExtIdx;
+ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
// Add cost for expanding or narrowing
NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
DstVecTy, SrcVecTy, ExtToVecMask, CostKind);
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
index 8c2455dd9d375..6c92892949175 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
@@ -88,22 +88,8 @@ entry:
define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) {
; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening(
; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:[[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17
-; OPT-NEXT:[[I_2:%.*]] = extractelement <32 x i8> [[IN]], i64 18
-; OPT-NEXT:[[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19
-; OPT-NEXT:[[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21
-; OPT-NEXT:[[I_6:%.*]] = extractelement <32 x i8> [[IN]], i64 22
-; OPT-NEXT:[[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23
-; OPT-NEXT:[[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison,
<8 x i32>
-; OPT-NEXT:[[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1
-; OPT-NEXT:[[O_2:%.*]] = insertelement <8 x i8> [[O_1]], i8 [[I_2]], i32 2
-; OPT-NEXT:[[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3
-; OPT-NEXT:[[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8>
poison, <8 x i32>
-; OPT-NEXT:[[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8>
[[TMP1]], <
[llvm-branch-commits] [llvm] VectorCombine: Improve the insert/extract fold in the narrowing case (PR #168820)
https://github.com/nhaehnle updated
https://github.com/llvm/llvm-project/pull/168820
From 2ee921f4d07bd5b457d5e79a848ab6183c3a8c52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?=
Date: Wed, 19 Nov 2025 18:00:32 -0800
Subject: [PATCH] VectorCombine: Improve the insert/extract fold in the
narrowing case
Keeping the extracted element in a natural position in the narrowed
vector has two beneficial effects:
1. It makes the narrowing shuffles cheaper (at least on AMDGPU), which
allows the insert/extract fold to trigger.
2. It makes the narrowing shuffles in a chain of extract/insert
compatible, which allows foldLengthChangingShuffles to successfully
recognize a chain that can be folded.
There are minor X86 test changes that look reasonable to me. The IR
change for AVX2 in
llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
doesn't change the assembly generated by `llc -mtriple=x86_64-- -mattr=AVX2`
at all.
commit-id:c151bb04
---
.../Transforms/Vectorize/VectorCombine.cpp| 22 +--
.../VectorCombine/AMDGPU/extract-insert-i8.ll | 18 ++-
.../X86/extract-insert-poison.ll | 12 ++
.../VectorCombine/X86/extract-insert.ll | 8 +++
.../Transforms/VectorCombine/X86/pr126085.ll | 4 ++--
5 files changed, 22 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b83597fec021a..4b081205eba10 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -4558,22 +4558,15 @@ bool
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
SmallVector Mask(NumDstElts, PoisonMaskElem);
bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
- bool IsExtIdxInBounds = ExtIdx < NumDstElts;
bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec);
if (NeedDstSrcSwap) {
SK = TargetTransformInfo::SK_PermuteSingleSrc;
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
- Mask[InsIdx] = 0;
-else
- Mask[InsIdx] = ExtIdx;
+Mask[InsIdx] = ExtIdx % NumDstElts;
std::swap(DstVec, SrcVec);
} else {
SK = TargetTransformInfo::SK_PermuteTwoSrc;
std::iota(Mask.begin(), Mask.end(), 0);
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
- Mask[InsIdx] = NumDstElts;
-else
- Mask[InsIdx] = ExtIdx + NumDstElts;
+Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
}
// Cost
@@ -4594,14 +4587,11 @@ bool
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
NewCost += TTI.getShuffleCost(SK, DstVecTy, DstVecTy, Mask, CostKind, 0,
nullptr, {DstVec, SrcVec});
} else {
-// When creating length-changing-vector, always create with a Mask whose
-// first element has an ExtIdx, so that the first element of the vector
-// being created is always the target to be extracted.
+// When creating a length-changing-vector, always try to keep the relevant
+// element in an equivalent position, so that bulk shuffles are more likely
+// to be useful.
ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
-if (IsExtIdxInBounds)
- ExtToVecMask[ExtIdx] = ExtIdx;
-else
- ExtToVecMask[0] = ExtIdx;
+ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
// Add cost for expanding or narrowing
NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
DstVecTy, SrcVecTy, ExtToVecMask, CostKind);
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
index 8c2455dd9d375..6c92892949175 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
@@ -88,22 +88,8 @@ entry:
define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) {
; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening(
; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:[[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17
-; OPT-NEXT:[[I_2:%.*]] = extractelement <32 x i8> [[IN]], i64 18
-; OPT-NEXT:[[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19
-; OPT-NEXT:[[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21
-; OPT-NEXT:[[I_6:%.*]] = extractelement <32 x i8> [[IN]], i64 22
-; OPT-NEXT:[[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23
-; OPT-NEXT:[[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison,
<8 x i32>
-; OPT-NEXT:[[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1
-; OPT-NEXT:[[O_2:%.*]] = insertelement <8 x i8> [[O_1]], i8 [[I_2]], i32 2
-; OPT-NEXT:[[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3
-; OPT-NEXT:[[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8>
poison, <8 x i32>
-; OPT-NEXT:[[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8>
[[TMP1]], <
[llvm-branch-commits] [llvm] VectorCombine: Fold chains of shuffles fed by length-changing shuffles (PR #168819)
https://github.com/nhaehnle updated
https://github.com/llvm/llvm-project/pull/168819
From 316715e02c8ebdc5014f5666e62738d0367c853d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?=
Date: Wed, 19 Nov 2025 17:59:11 -0800
Subject: [PATCH] VectorCombine: Fold chains of shuffles fed by length-changing
shuffles
Such chains can arise from folding insert/extract chains.
commit-id:a960175d
---
.../Transforms/Vectorize/VectorCombine.cpp| 192 ++
.../VectorCombine/AMDGPU/extract-insert-i8.ll | 36 +---
.../shuffles-of-length-changing-shuffles.ll | 12 +-
3 files changed, 200 insertions(+), 40 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 243f685cf25e2..b83597fec021a 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -140,6 +140,7 @@ class VectorCombine {
bool foldShuffleOfCastops(Instruction &I);
bool foldShuffleOfShuffles(Instruction &I);
bool foldPermuteOfIntrinsic(Instruction &I);
+ bool foldShufflesOfLengthChangingShuffles(Instruction &I);
bool foldShuffleOfIntrinsics(Instruction &I);
bool foldShuffleToIdentity(Instruction &I);
bool foldShuffleFromReductions(Instruction &I);
@@ -2878,6 +2879,195 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction
&I) {
return true;
}
+/// Try to convert a chain of length-preserving shuffles that are fed by
+/// length-changing shuffles from the same source, e.g. a chain of length 3:
+///
+/// "shuffle (shuffle (shuffle x, (shuffle y, undef)),
+/// (shuffle y, undef)),
+// (shuffle y, undef)"
+///
+/// into a single shuffle fed by a length-changing shuffle:
+///
+/// "shuffle x, (shuffle y, undef)"
+///
+/// Such chains arise e.g. from folding extract/insert sequences.
+bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &I) {
+ FixedVectorType *TrunkType = dyn_cast(I.getType());
+ if (!TrunkType)
+return false;
+
+ unsigned ChainLength = 0;
+ SmallVector Mask;
+ SmallVector YMask;
+ InstructionCost OldCost = 0;
+ InstructionCost NewCost = 0;
+ Value *Trunk = &I;
+ unsigned NumTrunkElts = TrunkType->getNumElements();
+ Value *Y = nullptr;
+
+ for (;;) {
+// Match the current trunk against (commutations of) the pattern
+// "shuffle trunk', (shuffle y, undef)"
+ArrayRef OuterMask;
+Value *OuterV0, *OuterV1;
+if (ChainLength != 0 && !Trunk->hasOneUse())
+ break;
+if (!match(Trunk, m_Shuffle(m_Value(OuterV0), m_Value(OuterV1),
+m_Mask(OuterMask
+ break;
+if (OuterV0->getType() != TrunkType) {
+ // This shuffle is not length-preserving, so it cannot be part of the
+ // chain.
+ break;
+}
+
+ArrayRef InnerMask0, InnerMask1;
+Value *A0, *A1, *B0, *B1;
+bool Match0 =
+match(OuterV0, m_Shuffle(m_Value(A0), m_Value(B0),
m_Mask(InnerMask0)));
+bool Match1 =
+match(OuterV1, m_Shuffle(m_Value(A1), m_Value(B1),
m_Mask(InnerMask1)));
+bool Match0Leaf = Match0 && A0->getType() != I.getType();
+bool Match1Leaf = Match1 && A1->getType() != I.getType();
+if (Match0Leaf == Match1Leaf) {
+ // Only handle the case of exactly one leaf in each step. The "two
leaves"
+ // case is handled by foldShuffleOfShuffles.
+ break;
+}
+
+SmallVector CommutedOuterMask;
+if (Match0Leaf) {
+ std::swap(OuterV0, OuterV1);
+ std::swap(InnerMask0, InnerMask1);
+ std::swap(A0, A1);
+ std::swap(B0, B1);
+ llvm::append_range(CommutedOuterMask, OuterMask);
+ for (int &M : CommutedOuterMask) {
+if (M == PoisonMaskElem)
+ continue;
+if (M < (int)NumTrunkElts)
+ M += NumTrunkElts;
+else
+ M -= NumTrunkElts;
+ }
+ OuterMask = CommutedOuterMask;
+}
+if (!OuterV1->hasOneUse())
+ break;
+
+if (!isa(A1)) {
+ if (!Y)
+Y = A1;
+ else if (Y != A1)
+break;
+}
+if (!isa(B1)) {
+ if (!Y)
+Y = B1;
+ else if (Y != B1)
+break;
+}
+
+auto *YType = cast(A1->getType());
+int NumLeafElts = YType->getNumElements();
+SmallVector LocalYMask(InnerMask1);
+for (int &M : LocalYMask) {
+ if (M >= NumLeafElts)
+M -= NumLeafElts;
+}
+
+InstructionCost LocalOldCost =
+TTI.getInstructionCost(cast(Trunk), CostKind) +
+TTI.getInstructionCost(cast(OuterV1), CostKind);
+
+// Handle the initial (start of chain) case.
+if (!ChainLength) {
+ Mask.assign(OuterMask);
+ YMask.assign(LocalYMask);
+ OldCost = NewCost = LocalOldCost;
+ Trunk = OuterV0;
+ ChainLength++;
+ continue;
+}
+
+// For the non-root case, first attempt to combine masks.
+SmallVector NewYMask(YMask);
+bool Valid = true;
+for (auto
[llvm-branch-commits] [llvm] VectorCombine: Fold chains of shuffles fed by length-changing shuffles (PR #168819)
https://github.com/nhaehnle updated
https://github.com/llvm/llvm-project/pull/168819
From 316715e02c8ebdc5014f5666e62738d0367c853d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?=
Date: Wed, 19 Nov 2025 17:59:11 -0800
Subject: [PATCH] VectorCombine: Fold chains of shuffles fed by length-changing
shuffles
Such chains can arise from folding insert/extract chains.
commit-id:a960175d
---
.../Transforms/Vectorize/VectorCombine.cpp| 192 ++
.../VectorCombine/AMDGPU/extract-insert-i8.ll | 36 +---
.../shuffles-of-length-changing-shuffles.ll | 12 +-
3 files changed, 200 insertions(+), 40 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 243f685cf25e2..b83597fec021a 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -140,6 +140,7 @@ class VectorCombine {
bool foldShuffleOfCastops(Instruction &I);
bool foldShuffleOfShuffles(Instruction &I);
bool foldPermuteOfIntrinsic(Instruction &I);
+ bool foldShufflesOfLengthChangingShuffles(Instruction &I);
bool foldShuffleOfIntrinsics(Instruction &I);
bool foldShuffleToIdentity(Instruction &I);
bool foldShuffleFromReductions(Instruction &I);
@@ -2878,6 +2879,195 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction
&I) {
return true;
}
+/// Try to convert a chain of length-preserving shuffles that are fed by
+/// length-changing shuffles from the same source, e.g. a chain of length 3:
+///
+/// "shuffle (shuffle (shuffle x, (shuffle y, undef)),
+/// (shuffle y, undef)),
+// (shuffle y, undef)"
+///
+/// into a single shuffle fed by a length-changing shuffle:
+///
+/// "shuffle x, (shuffle y, undef)"
+///
+/// Such chains arise e.g. from folding extract/insert sequences.
+bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &I) {
+ FixedVectorType *TrunkType = dyn_cast(I.getType());
+ if (!TrunkType)
+return false;
+
+ unsigned ChainLength = 0;
+ SmallVector Mask;
+ SmallVector YMask;
+ InstructionCost OldCost = 0;
+ InstructionCost NewCost = 0;
+ Value *Trunk = &I;
+ unsigned NumTrunkElts = TrunkType->getNumElements();
+ Value *Y = nullptr;
+
+ for (;;) {
+// Match the current trunk against (commutations of) the pattern
+// "shuffle trunk', (shuffle y, undef)"
+ArrayRef OuterMask;
+Value *OuterV0, *OuterV1;
+if (ChainLength != 0 && !Trunk->hasOneUse())
+ break;
+if (!match(Trunk, m_Shuffle(m_Value(OuterV0), m_Value(OuterV1),
+m_Mask(OuterMask
+ break;
+if (OuterV0->getType() != TrunkType) {
+ // This shuffle is not length-preserving, so it cannot be part of the
+ // chain.
+ break;
+}
+
+ArrayRef InnerMask0, InnerMask1;
+Value *A0, *A1, *B0, *B1;
+bool Match0 =
+match(OuterV0, m_Shuffle(m_Value(A0), m_Value(B0),
m_Mask(InnerMask0)));
+bool Match1 =
+match(OuterV1, m_Shuffle(m_Value(A1), m_Value(B1),
m_Mask(InnerMask1)));
+bool Match0Leaf = Match0 && A0->getType() != I.getType();
+bool Match1Leaf = Match1 && A1->getType() != I.getType();
+if (Match0Leaf == Match1Leaf) {
+ // Only handle the case of exactly one leaf in each step. The "two
leaves"
+ // case is handled by foldShuffleOfShuffles.
+ break;
+}
+
+SmallVector CommutedOuterMask;
+if (Match0Leaf) {
+ std::swap(OuterV0, OuterV1);
+ std::swap(InnerMask0, InnerMask1);
+ std::swap(A0, A1);
+ std::swap(B0, B1);
+ llvm::append_range(CommutedOuterMask, OuterMask);
+ for (int &M : CommutedOuterMask) {
+if (M == PoisonMaskElem)
+ continue;
+if (M < (int)NumTrunkElts)
+ M += NumTrunkElts;
+else
+ M -= NumTrunkElts;
+ }
+ OuterMask = CommutedOuterMask;
+}
+if (!OuterV1->hasOneUse())
+ break;
+
+if (!isa(A1)) {
+ if (!Y)
+Y = A1;
+ else if (Y != A1)
+break;
+}
+if (!isa(B1)) {
+ if (!Y)
+Y = B1;
+ else if (Y != B1)
+break;
+}
+
+auto *YType = cast(A1->getType());
+int NumLeafElts = YType->getNumElements();
+SmallVector LocalYMask(InnerMask1);
+for (int &M : LocalYMask) {
+ if (M >= NumLeafElts)
+M -= NumLeafElts;
+}
+
+InstructionCost LocalOldCost =
+TTI.getInstructionCost(cast(Trunk), CostKind) +
+TTI.getInstructionCost(cast(OuterV1), CostKind);
+
+// Handle the initial (start of chain) case.
+if (!ChainLength) {
+ Mask.assign(OuterMask);
+ YMask.assign(LocalYMask);
+ OldCost = NewCost = LocalOldCost;
+ Trunk = OuterV0;
+ ChainLength++;
+ continue;
+}
+
+// For the non-root case, first attempt to combine masks.
+SmallVector NewYMask(YMask);
+bool Valid = true;
+for (auto
[llvm-branch-commits] [llvm] VectorCombine: Fold chains of shuffles fed by length-changing shuffles (PR #168819)
https://github.com/nhaehnle updated
https://github.com/llvm/llvm-project/pull/168819
From 316715e02c8ebdc5014f5666e62738d0367c853d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?=
Date: Wed, 19 Nov 2025 17:59:11 -0800
Subject: [PATCH] VectorCombine: Fold chains of shuffles fed by length-changing
shuffles
Such chains can arise from folding insert/extract chains.
commit-id:a960175d
---
.../Transforms/Vectorize/VectorCombine.cpp| 192 ++
.../VectorCombine/AMDGPU/extract-insert-i8.ll | 36 +---
.../shuffles-of-length-changing-shuffles.ll | 12 +-
3 files changed, 200 insertions(+), 40 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 243f685cf25e2..b83597fec021a 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -140,6 +140,7 @@ class VectorCombine {
bool foldShuffleOfCastops(Instruction &I);
bool foldShuffleOfShuffles(Instruction &I);
bool foldPermuteOfIntrinsic(Instruction &I);
+ bool foldShufflesOfLengthChangingShuffles(Instruction &I);
bool foldShuffleOfIntrinsics(Instruction &I);
bool foldShuffleToIdentity(Instruction &I);
bool foldShuffleFromReductions(Instruction &I);
@@ -2878,6 +2879,195 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction
&I) {
return true;
}
+/// Try to convert a chain of length-preserving shuffles that are fed by
+/// length-changing shuffles from the same source, e.g. a chain of length 3:
+///
+/// "shuffle (shuffle (shuffle x, (shuffle y, undef)),
+/// (shuffle y, undef)),
+// (shuffle y, undef)"
+///
+/// into a single shuffle fed by a length-changing shuffle:
+///
+/// "shuffle x, (shuffle y, undef)"
+///
+/// Such chains arise e.g. from folding extract/insert sequences.
+bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &I) {
+ FixedVectorType *TrunkType = dyn_cast(I.getType());
+ if (!TrunkType)
+return false;
+
+ unsigned ChainLength = 0;
+ SmallVector Mask;
+ SmallVector YMask;
+ InstructionCost OldCost = 0;
+ InstructionCost NewCost = 0;
+ Value *Trunk = &I;
+ unsigned NumTrunkElts = TrunkType->getNumElements();
+ Value *Y = nullptr;
+
+ for (;;) {
+// Match the current trunk against (commutations of) the pattern
+// "shuffle trunk', (shuffle y, undef)"
+ArrayRef OuterMask;
+Value *OuterV0, *OuterV1;
+if (ChainLength != 0 && !Trunk->hasOneUse())
+ break;
+if (!match(Trunk, m_Shuffle(m_Value(OuterV0), m_Value(OuterV1),
+m_Mask(OuterMask
+ break;
+if (OuterV0->getType() != TrunkType) {
+ // This shuffle is not length-preserving, so it cannot be part of the
+ // chain.
+ break;
+}
+
+ArrayRef InnerMask0, InnerMask1;
+Value *A0, *A1, *B0, *B1;
+bool Match0 =
+match(OuterV0, m_Shuffle(m_Value(A0), m_Value(B0),
m_Mask(InnerMask0)));
+bool Match1 =
+match(OuterV1, m_Shuffle(m_Value(A1), m_Value(B1),
m_Mask(InnerMask1)));
+bool Match0Leaf = Match0 && A0->getType() != I.getType();
+bool Match1Leaf = Match1 && A1->getType() != I.getType();
+if (Match0Leaf == Match1Leaf) {
+ // Only handle the case of exactly one leaf in each step. The "two
leaves"
+ // case is handled by foldShuffleOfShuffles.
+ break;
+}
+
+SmallVector CommutedOuterMask;
+if (Match0Leaf) {
+ std::swap(OuterV0, OuterV1);
+ std::swap(InnerMask0, InnerMask1);
+ std::swap(A0, A1);
+ std::swap(B0, B1);
+ llvm::append_range(CommutedOuterMask, OuterMask);
+ for (int &M : CommutedOuterMask) {
+if (M == PoisonMaskElem)
+ continue;
+if (M < (int)NumTrunkElts)
+ M += NumTrunkElts;
+else
+ M -= NumTrunkElts;
+ }
+ OuterMask = CommutedOuterMask;
+}
+if (!OuterV1->hasOneUse())
+ break;
+
+if (!isa(A1)) {
+ if (!Y)
+Y = A1;
+ else if (Y != A1)
+break;
+}
+if (!isa(B1)) {
+ if (!Y)
+Y = B1;
+ else if (Y != B1)
+break;
+}
+
+auto *YType = cast(A1->getType());
+int NumLeafElts = YType->getNumElements();
+SmallVector LocalYMask(InnerMask1);
+for (int &M : LocalYMask) {
+ if (M >= NumLeafElts)
+M -= NumLeafElts;
+}
+
+InstructionCost LocalOldCost =
+TTI.getInstructionCost(cast(Trunk), CostKind) +
+TTI.getInstructionCost(cast(OuterV1), CostKind);
+
+// Handle the initial (start of chain) case.
+if (!ChainLength) {
+ Mask.assign(OuterMask);
+ YMask.assign(LocalYMask);
+ OldCost = NewCost = LocalOldCost;
+ Trunk = OuterV0;
+ ChainLength++;
+ continue;
+}
+
+// For the non-root case, first attempt to combine masks.
+SmallVector NewYMask(YMask);
+bool Valid = true;
+for (auto
[llvm-branch-commits] [llvm] VectorCombine: Improve the insert/extract fold in the narrowing case (PR #168820)
https://github.com/nhaehnle updated
https://github.com/llvm/llvm-project/pull/168820
From 2ee921f4d07bd5b457d5e79a848ab6183c3a8c52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?=
Date: Wed, 19 Nov 2025 18:00:32 -0800
Subject: [PATCH] VectorCombine: Improve the insert/extract fold in the
narrowing case
Keeping the extracted element in a natural position in the narrowed
vector has two beneficial effects:
1. It makes the narrowing shuffles cheaper (at least on AMDGPU), which
allows the insert/extract fold to trigger.
2. It makes the narrowing shuffles in a chain of extract/insert
compatible, which allows foldLengthChangingShuffles to successfully
recognize a chain that can be folded.
There are minor X86 test changes that look reasonable to me. The IR
change for AVX2 in
llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
doesn't change the assembly generated by `llc -mtriple=x86_64-- -mattr=AVX2`
at all.
commit-id:c151bb04
---
.../Transforms/Vectorize/VectorCombine.cpp| 22 +--
.../VectorCombine/AMDGPU/extract-insert-i8.ll | 18 ++-
.../X86/extract-insert-poison.ll | 12 ++
.../VectorCombine/X86/extract-insert.ll | 8 +++
.../Transforms/VectorCombine/X86/pr126085.ll | 4 ++--
5 files changed, 22 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index b83597fec021a..4b081205eba10 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -4558,22 +4558,15 @@ bool
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
SmallVector Mask(NumDstElts, PoisonMaskElem);
bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
- bool IsExtIdxInBounds = ExtIdx < NumDstElts;
bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec);
if (NeedDstSrcSwap) {
SK = TargetTransformInfo::SK_PermuteSingleSrc;
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
- Mask[InsIdx] = 0;
-else
- Mask[InsIdx] = ExtIdx;
+Mask[InsIdx] = ExtIdx % NumDstElts;
std::swap(DstVec, SrcVec);
} else {
SK = TargetTransformInfo::SK_PermuteTwoSrc;
std::iota(Mask.begin(), Mask.end(), 0);
-if (!IsExtIdxInBounds && NeedExpOrNarrow)
- Mask[InsIdx] = NumDstElts;
-else
- Mask[InsIdx] = ExtIdx + NumDstElts;
+Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
}
// Cost
@@ -4594,14 +4587,11 @@ bool
VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
NewCost += TTI.getShuffleCost(SK, DstVecTy, DstVecTy, Mask, CostKind, 0,
nullptr, {DstVec, SrcVec});
} else {
-// When creating length-changing-vector, always create with a Mask whose
-// first element has an ExtIdx, so that the first element of the vector
-// being created is always the target to be extracted.
+// When creating a length-changing-vector, always try to keep the relevant
+// element in an equivalent position, so that bulk shuffles are more likely
+// to be useful.
ExtToVecMask.assign(NumDstElts, PoisonMaskElem);
-if (IsExtIdxInBounds)
- ExtToVecMask[ExtIdx] = ExtIdx;
-else
- ExtToVecMask[0] = ExtIdx;
+ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
// Add cost for expanding or narrowing
NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
DstVecTy, SrcVecTy, ExtToVecMask, CostKind);
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
index 8c2455dd9d375..6c92892949175 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
@@ -88,22 +88,8 @@ entry:
define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) {
; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening(
; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] {
-; OPT-NEXT:[[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17
-; OPT-NEXT:[[I_2:%.*]] = extractelement <32 x i8> [[IN]], i64 18
-; OPT-NEXT:[[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19
-; OPT-NEXT:[[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21
-; OPT-NEXT:[[I_6:%.*]] = extractelement <32 x i8> [[IN]], i64 22
-; OPT-NEXT:[[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23
-; OPT-NEXT:[[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison,
<8 x i32>
-; OPT-NEXT:[[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1
-; OPT-NEXT:[[O_2:%.*]] = insertelement <8 x i8> [[O_1]], i8 [[I_2]], i32 2
-; OPT-NEXT:[[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3
-; OPT-NEXT:[[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8>
poison, <8 x i32>
-; OPT-NEXT:[[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8>
[[TMP1]], <
[llvm-branch-commits] [NFCI][ELF][AArch64][PAC] Teach addRelativeReloc to emit R_AARCH64_AUTH_RELATIVE (PR #171180)
@@ -704,8 +704,10 @@ static void addRelativeReloc(Ctx &ctx, InputSectionBase
&isec,
uint64_t offsetInSec, Symbol &sym, int64_t addend,
RelExpr expr, RelType type) {
Partition &part = isec.getPartition(ctx);
+ bool isAArch64Auth =
+ ctx.arg.emachine == EM_AARCH64 && type == R_AARCH64_AUTH_ABS64;
- if (sym.isTagged()) {
+ if (sym.isTagged() && !isAArch64Auth) {
kovdan01 wrote:
> but suspect that the ptrauth and memtag ABIs are currently mutually-exclusive?
@smithp35 Could you please clarify what is the correct answer for the question
above? I was only working on ptrauth and do not feel confident enough to reason
about memtag with ptrauth combination :)
https://github.com/llvm/llvm-project/pull/171180
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [TableGen] Improve error message for bad VTByHwMode in RegisterByHwMode (PR #171254)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/171254 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [TableGen] Improve error message for bad VTByHwMode in RegisterByHwMode (PR #171254)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/171254 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AMDGPU] Enable aliases in InstPrinter (PR #171265)
https://github.com/arichardson created https://github.com/llvm/llvm-project/pull/171265 After the latest change to tablegen, we now handle `RegClassByHwMode` correctly in the alias pattern output of -gen-asm-writer, so we can enable it for AMDGPU. Previously, `#define PRINT_ALIAS_INSTR` caused compilation failures due to tablegen referencing *RegClassID variables that do not exist for `RegClassByHwMode`. This causes a large number of test failures (380) so I just put this up as a draft pull request to see if it is desirable. It looks like the one test I updated uses wrong mnemonics so probably needs more work. ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AMDGPU] Enable aliases in InstPrinter (PR #171265)
arichardson wrote: Depends on https://github.com/llvm/llvm-project/pull/171264 https://github.com/llvm/llvm-project/pull/171265 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AMDGPU] Enable aliases in InstPrinter (PR #171265)
https://github.com/arichardson converted_to_draft https://github.com/llvm/llvm-project/pull/171265 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AMDGPU] Enable aliases in InstPrinter (PR #171265)
llvmbot wrote:
@llvm/pr-subscribers-backend-amdgpu
Author: Alexander Richardson (arichardson)
Changes
After the latest change to tablegen, we now handle `RegClassByHwMode`
correctly in the alias pattern output of -gen-asm-writer, so we can
enable it for AMDGPU. Previously, `#define PRINT_ALIAS_INSTR` caused
compilation failures due to tablegen referencing *RegClassID variables
that do not exist for `RegClassByHwMode`.
This causes a large number of test failures (380) so I just put this
up as a draft pull request to see if it is desirable. It looks like the
one test I updated uses wrong mnemonics so probably needs more work.
---
Full diff: https://github.com/llvm/llvm-project/pull/171265.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3-1)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h (+5)
- (modified) llvm/test/CodeGen/AMDGPU/max.ll (+4-4)
``diff
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index b63d71dc2fde9..35a61616b0f8c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -44,7 +44,8 @@ void AMDGPUInstPrinter::printRegName(raw_ostream &OS,
MCRegister Reg) {
void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annot, const MCSubtargetInfo &STI,
raw_ostream &OS) {
- printInstruction(MI, Address, STI, OS);
+ if (!PrintAliases || !printAliasInstr(MI, Address, STI, OS))
+printInstruction(MI, Address, STI, OS);
printAnnotation(OS, Annot);
}
@@ -1944,4 +1945,5 @@ void AMDGPUInstPrinter::printScaleSel(const MCInst *MI,
unsigned OpNo,
O << " scale_sel:" << formatDec(Imm);
}
+#define PRINT_ALIAS_INSTR
#include "AMDGPUGenAsmWriter.inc"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 564d6eea52328..b851615280f89 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -39,6 +39,11 @@ class AMDGPUInstPrinter : public MCInstPrinter {
raw_ostream &O, const MCRegisterInfo &MRI);
private:
+ bool printAliasInstr(const MCInst *MI, uint64_t Address,
+ const MCSubtargetInfo &STI, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpIdx, unsigned PrintMethodIdx,
+ const MCSubtargetInfo &STI, raw_ostream &OS);
void printU16ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/llvm/test/CodeGen/AMDGPU/max.ll b/llvm/test/CodeGen/AMDGPU/max.ll
index ac6dd30283554..e839b3dc1a916 100644
--- a/llvm/test/CodeGen/AMDGPU/max.ll
+++ b/llvm/test/CodeGen/AMDGPU/max.ll
@@ -1120,7 +1120,7 @@ define amdgpu_kernel void @test_umax_ugt_i64(ptr
addrspace(1) %out, i64 %a, i64
; SI-NEXT:s_mov_b32 s4, s0
; SI-NEXT:v_mov_b32_e32 v0, s8
; SI-NEXT:v_mov_b32_e32 v1, s9
-; SI-NEXT:v_cmp_gt_u64_e32 vcc, s[2:3], v[0:1]
+; SI-NEXT:V_CMP_GT_U64 vcc, s[2:3], v[0:1]
; SI-NEXT:s_mov_b32 s5, s1
; SI-NEXT:s_and_b64 s[0:1], vcc, exec
; SI-NEXT:s_cselect_b32 s0, s3, s9
@@ -1174,7 +1174,7 @@ define amdgpu_kernel void @test_umax_uge_i64(ptr
addrspace(1) %out, i64 %a, i64
; SI-NEXT:s_mov_b32 s4, s0
; SI-NEXT:v_mov_b32_e32 v0, s8
; SI-NEXT:v_mov_b32_e32 v1, s9
-; SI-NEXT:v_cmp_ge_u64_e32 vcc, s[2:3], v[0:1]
+; SI-NEXT:V_CMP_GE_U64 vcc, s[2:3], v[0:1]
; SI-NEXT:s_mov_b32 s5, s1
; SI-NEXT:s_and_b64 s[0:1], vcc, exec
; SI-NEXT:s_cselect_b32 s0, s3, s9
@@ -1228,7 +1228,7 @@ define amdgpu_kernel void @test_imax_sgt_i64(ptr
addrspace(1) %out, i64 %a, i64
; SI-NEXT:s_mov_b32 s4, s0
; SI-NEXT:v_mov_b32_e32 v0, s8
; SI-NEXT:v_mov_b32_e32 v1, s9
-; SI-NEXT:v_cmp_gt_i64_e32 vcc, s[2:3], v[0:1]
+; SI-NEXT:V_CMP_GT_I64 vcc, s[2:3], v[0:1]
; SI-NEXT:s_mov_b32 s5, s1
; SI-NEXT:s_and_b64 s[0:1], vcc, exec
; SI-NEXT:s_cselect_b32 s0, s3, s9
@@ -1282,7 +1282,7 @@ define amdgpu_kernel void @test_imax_sge_i64(ptr
addrspace(1) %out, i64 %a, i64
; SI-NEXT:s_mov_b32 s4, s0
; SI-NEXT:v_mov_b32_e32 v0, s8
; SI-NEXT:v_mov_b32_e32 v1, s9
-; SI-NEXT:v_cmp_ge_i64_e32 vcc, s[2:3], v[0:1]
+; SI-NEXT:V_CMP_GE_I64 vcc, s[2:3], v[0:1]
; SI-NEXT:s_mov_b32 s5, s1
; SI-NEXT:s_and_b64 s[0:1], vcc, exec
; SI-NEXT:s_cselect_b32 s0, s3, s9
``
https://github.com/llvm/llvm-project/pull/171265
___
llvm-branch-commits mailing list
llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][py] partially use mlir_type_subclass for IRTypes.cpp (PR #171143)
https://github.com/makslevental requested changes to this pull request. This isn't a hard block - just a request for discussion. > This is part of a longer-term cleanup to only support one subclassing > mechanism. I had the same idea but I think we should go in the opposite direction - remove all of the `mlir_*_subclass`es and unify on `PyConcrete*`. I started that here https://github.com/llvm/llvm-project/pull/156575 but de-prioritized. If this unification is a priority, I can finish that PR this week. I don't think `mlir_*_subclass` is the way to go - they're not "real" in the sense that you're giving up everything useful about nanobind by using them - i.e., all of the convenience `def*`, type signature generation, etc. Why do you think `mlir_type_subclass` should be it instead of PyConcreteType? https://github.com/llvm/llvm-project/pull/171143 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize binary (PR #171149)
https://github.com/bgergely0 created
https://github.com/llvm/llvm-project/pull/171149
This patch adds BTI landing pads to ShortJmp/LongJmp targets in the
LongJmp pass when optimizing BTI binaries.
BOLT does not have the ability to add BTI to all types of functions.
This patch aims to insert the landing pad where possible, and emit an
error where it currently is not.
BOLT cannot insert BTIs into several function "types", including:
- ignored functions,
- PLT functions,
- other functions without a CFG.
Additional context:
In #161206, BOLT gained the ability to decode the .note.gnu.property
section, and warn about lack of BTI support for BOLT. However, this
warning is misleading: the emitted binary may not need extra BTI landing
pads.
With this patch, the emitted binary will be "BTI-safe".
From b125532e23a8962b3ade994ed23b8bfcb250669e Mon Sep 17 00:00:00 2001
From: Gergely Balint
Date: Wed, 3 Sep 2025 13:29:39 +
Subject: [PATCH] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize
binary
This patch adds BTI landing pads to ShortJmp/LongJmp targets in the
LongJmp pass when optimizing BTI binaries.
BOLT does not have the ability to add BTI to all types of functions.
This patch aims to insert the landing pad where possible, and emit an
error where it currently is not.
BOLT cannot insert BTIs into several function "types", including:
- ignored functions,
- PLT functions,
- other functions without a CFG.
Additional context:
In #161206, BOLT gained the ability to decode the .note.gnu.property
section, and warn about lack of BTI support for BOLT. However, this
warning is misleading: the emitted binary may not need extra BTI landing
pads.
With this patch, the emitted binary will be "BTI-safe".
---
bolt/include/bolt/Core/BinaryBasicBlock.h | 2 +
bolt/lib/Passes/LongJmp.cpp | 53 +--
bolt/lib/Rewrite/GNUPropertyRewriter.cpp | 3 +-
bolt/test/AArch64/bti-note.test | 4 +-
bolt/test/AArch64/long-jmp-bti-ignored.s | 35 +++
bolt/test/AArch64/long-jmp-bti.s | 46
bolt/test/AArch64/no-bti-note.test| 4 +-
7 files changed, 138 insertions(+), 9 deletions(-)
create mode 100644 bolt/test/AArch64/long-jmp-bti-ignored.s
create mode 100644 bolt/test/AArch64/long-jmp-bti.s
diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h
b/bolt/include/bolt/Core/BinaryBasicBlock.h
index 629f0ce8314dc..2be30c14bf90b 100644
--- a/bolt/include/bolt/Core/BinaryBasicBlock.h
+++ b/bolt/include/bolt/Core/BinaryBasicBlock.h
@@ -890,6 +890,8 @@ class BinaryBasicBlock {
/// Needed by graph traits.
BinaryFunction *getParent() const { return getFunction(); }
+ bool hasParent() const { return getFunction() != nullptr; }
+
/// Return true if the containing function is in CFG state.
bool hasCFG() const;
diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp
index 03c1ea9d837e2..f0ef135df06d8 100644
--- a/bolt/lib/Passes/LongJmp.cpp
+++ b/bolt/lib/Passes/LongJmp.cpp
@@ -469,8 +469,8 @@ uint64_t LongJmpPass::getSymbolAddress(const BinaryContext
&BC,
}
Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, bool &Modified) {
- const BinaryFunction &Func = *StubBB.getFunction();
- const BinaryContext &BC = Func.getBinaryContext();
+ BinaryFunction &Func = *StubBB.getFunction();
+ BinaryContext &BC = Func.getBinaryContext();
const int Bits = StubBits[&StubBB];
// Already working with the largest range?
if (Bits == static_cast(BC.AsmInfo->getCodePointerSize() * 8))
@@ -483,11 +483,54 @@ Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB,
bool &Modified) {
~((1ULL << (RangeSingleInstr - 1)) - 1);
const MCSymbol *RealTargetSym = BC.MIB->getTargetSymbol(*StubBB.begin());
- const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(RealTargetSym);
+ BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(RealTargetSym);
+ BinaryFunction *TargetFunction = BC.getFunctionForSymbol(RealTargetSym);
uint64_t TgtAddress = getSymbolAddress(BC, RealTargetSym, TgtBB);
uint64_t DotAddress = BBAddresses[&StubBB];
uint64_t PCRelTgtAddress = DotAddress > TgtAddress ? DotAddress - TgtAddress
: TgtAddress - DotAddress;
+
+ auto applyBTIFixup = [&](BinaryFunction *TargetFunction,
+ BinaryBasicBlock *RealTgtBB) {
+// TODO: add support for editing each type, and remove errors.
+if (!TargetFunction && !RealTgtBB) {
+ BC.errs() << "BOLT-ERROR: Cannot add BTI to function with symbol "
+<< RealTargetSym->getName() << "\n";
+ exit(1);
+}
+if (TargetFunction && TargetFunction->isIgnored()) {
+ BC.errs() << "BOLT-ERROR: Cannot add BTI landing pad to ignored function
"
+<< TargetFunction->getPrintName() << "\n";
+ exit(1);
+}
+if (TargetFunction && !TargetFunction->hasCFG()) {
+ auto FirstII = TargetFunction->instrs().begin();
[llvm-branch-commits] [llvm] [BOLT][BTI] Add needed BTIs in LongJmp or refuse to optimize binary (PR #171149)
bgergely0 wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.com/github/pr/llvm/llvm-project/171149?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#171149** https://app.graphite.com/github/pr/llvm/llvm-project/171149?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.com/github/pr/llvm/llvm-project/171149?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#167329** https://app.graphite.com/github/pr/llvm/llvm-project/167329?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#167308** https://app.graphite.com/github/pr/llvm/llvm-project/167308?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#167306** https://app.graphite.com/github/pr/llvm/llvm-project/167306?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#167305** https://app.graphite.com/github/pr/llvm/llvm-project/167305?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/171149 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)
https://github.com/atrosinenko updated
https://github.com/llvm/llvm-project/pull/170876
>From fb3eff2ed2cef9f334703988105ae68b8c25be12 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko
Date: Fri, 5 Dec 2025 16:08:28 +0300
Subject: [PATCH] [ADT] Make use of subsetOf and anyCommon methods of BitVector
(NFC)
Replace the code along these lines
BitVector Tmp = LHS;
Tmp &= RHS;
return Tmp.any();
and
BitVector Tmp = LHS;
Tmp.reset(RHS);
return Tmp.none();
with `LHS.anyCommon(RHS)` and `LHS.subsetOf(RHS)`, correspondingly, which
do not require creating temporary BitVector and can return early.
---
bolt/include/bolt/Passes/LivenessAnalysis.h | 5 ++---
bolt/include/bolt/Passes/ReachingDefOrUse.h | 3 +--
bolt/lib/Passes/RegReAssign.cpp | 8 ++--
bolt/lib/Passes/ShrinkWrapping.cpp| 10 --
bolt/lib/Passes/StackAvailableExpressions.cpp | 3 +--
bolt/lib/Passes/TailDuplication.cpp | 8
llvm/lib/CodeGen/RDFRegisters.cpp | 6 ++
llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp | 6 ++
8 files changed, 18 insertions(+), 31 deletions(-)
diff --git a/bolt/include/bolt/Passes/LivenessAnalysis.h
b/bolt/include/bolt/Passes/LivenessAnalysis.h
index 1df1113318d0b..f4faa1dc34ecd 100644
--- a/bolt/include/bolt/Passes/LivenessAnalysis.h
+++ b/bolt/include/bolt/Passes/LivenessAnalysis.h
@@ -37,10 +37,9 @@ class LivenessAnalysis : public
DataflowAnalysisgetStateAt(PP));
+const BitVector &BV = *this->getStateAt(PP);
const BitVector &RegAliases = BC.MIB->getAliases(Reg);
-BV &= RegAliases;
-return BV.any();
+return BV.anyCommon(RegAliases);
}
void run() { Parent::run(); }
diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h
b/bolt/include/bolt/Passes/ReachingDefOrUse.h
index 585d673e3b84e..41a6091aad4cb 100644
--- a/bolt/include/bolt/Passes/ReachingDefOrUse.h
+++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h
@@ -133,8 +133,7 @@ class ReachingDefOrUse
RA.getInstClobberList(Point, Regs);
else
RA.getInstUsedRegsList(Point, Regs, false);
-Regs &= this->BC.MIB->getAliases(*TrackingReg);
-if (Regs.any())
+if (Regs.anyCommon(this->BC.MIB->getAliases(*TrackingReg)))
Next.set(this->ExprToIdx[&Point]);
}
}
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 0859cd244ce40..54eff51bfee68 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -316,18 +316,14 @@ void
RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
break;
}
-BitVector AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ClassicReg))) {
LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
<< " with " << BC.MRI->getName(ExtReg)
<< " because classic reg is alive\n");
--End;
continue;
}
-AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ExtReg))) {
LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
<< " with " << BC.MRI->getName(ExtReg)
<< " because extended reg is alive\n");
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp
b/bolt/lib/Passes/ShrinkWrapping.cpp
index fe342ccd38a67..b882e2512866d 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -1100,9 +1100,8 @@ SmallVector
ShrinkWrapping::fixPopsPlacements(
bool Found = false;
if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
SaveOffset) {
- BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
- BV &= UsesByReg[CSR];
- if (!BV.any()) {
+ const BitVector &BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
+ if (!BV.anyCommon(UsesByReg[CSR])) {
Found = true;
PP = BB;
continue;
@@ -1110,9 +1109,8 @@ SmallVector
ShrinkWrapping::fixPopsPlacements(
}
for (MCInst &Inst : llvm::reverse(*BB)) {
if (SPT.getStateBefore(Inst)->first == SaveOffset) {
-BitVector BV = *RI.getStateAt(Inst);
-BV &= UsesByReg[CSR];
-if (!BV.any()) {
+const BitVector &BV = *RI.getStateAt(Inst);
+if (!BV.anyCommon(UsesByReg[CSR])) {
Found = true;
PP = &Inst;
break;
diff --git a/bolt/lib/Passes/StackAvailableExpressions.cpp
b/bolt/lib/Passes/StackAvailableExpressions.cpp
index a0d361f273de2..c685cc19badc3 100644
--- a/bolt/lib/Passes/StackAvailableExpressions.cpp
+++ b/bolt/lib/Passes/StackAvailableExpressions.cpp
@@ -103,8 +103,7 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst
*X
[llvm-branch-commits] [llvm] [ADT] Make use of subsetOf and anyCommon methods of BitVector (NFC) (PR #170876)
https://github.com/atrosinenko updated
https://github.com/llvm/llvm-project/pull/170876
>From fb3eff2ed2cef9f334703988105ae68b8c25be12 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko
Date: Fri, 5 Dec 2025 16:08:28 +0300
Subject: [PATCH] [ADT] Make use of subsetOf and anyCommon methods of BitVector
(NFC)
Replace the code along these lines
BitVector Tmp = LHS;
Tmp &= RHS;
return Tmp.any();
and
BitVector Tmp = LHS;
Tmp.reset(RHS);
return Tmp.none();
with `LHS.anyCommon(RHS)` and `LHS.subsetOf(RHS)`, correspondingly, which
do not require creating temporary BitVector and can return early.
---
bolt/include/bolt/Passes/LivenessAnalysis.h | 5 ++---
bolt/include/bolt/Passes/ReachingDefOrUse.h | 3 +--
bolt/lib/Passes/RegReAssign.cpp | 8 ++--
bolt/lib/Passes/ShrinkWrapping.cpp| 10 --
bolt/lib/Passes/StackAvailableExpressions.cpp | 3 +--
bolt/lib/Passes/TailDuplication.cpp | 8
llvm/lib/CodeGen/RDFRegisters.cpp | 6 ++
llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp | 6 ++
8 files changed, 18 insertions(+), 31 deletions(-)
diff --git a/bolt/include/bolt/Passes/LivenessAnalysis.h
b/bolt/include/bolt/Passes/LivenessAnalysis.h
index 1df1113318d0b..f4faa1dc34ecd 100644
--- a/bolt/include/bolt/Passes/LivenessAnalysis.h
+++ b/bolt/include/bolt/Passes/LivenessAnalysis.h
@@ -37,10 +37,9 @@ class LivenessAnalysis : public
DataflowAnalysisgetStateAt(PP));
+const BitVector &BV = *this->getStateAt(PP);
const BitVector &RegAliases = BC.MIB->getAliases(Reg);
-BV &= RegAliases;
-return BV.any();
+return BV.anyCommon(RegAliases);
}
void run() { Parent::run(); }
diff --git a/bolt/include/bolt/Passes/ReachingDefOrUse.h
b/bolt/include/bolt/Passes/ReachingDefOrUse.h
index 585d673e3b84e..41a6091aad4cb 100644
--- a/bolt/include/bolt/Passes/ReachingDefOrUse.h
+++ b/bolt/include/bolt/Passes/ReachingDefOrUse.h
@@ -133,8 +133,7 @@ class ReachingDefOrUse
RA.getInstClobberList(Point, Regs);
else
RA.getInstUsedRegsList(Point, Regs, false);
-Regs &= this->BC.MIB->getAliases(*TrackingReg);
-if (Regs.any())
+if (Regs.anyCommon(this->BC.MIB->getAliases(*TrackingReg)))
Next.set(this->ExprToIdx[&Point]);
}
}
diff --git a/bolt/lib/Passes/RegReAssign.cpp b/bolt/lib/Passes/RegReAssign.cpp
index 0859cd244ce40..54eff51bfee68 100644
--- a/bolt/lib/Passes/RegReAssign.cpp
+++ b/bolt/lib/Passes/RegReAssign.cpp
@@ -316,18 +316,14 @@ void
RegReAssign::aggressivePassOverFunction(BinaryFunction &Function) {
break;
}
-BitVector AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ClassicReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ClassicReg))) {
LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
<< " with " << BC.MRI->getName(ExtReg)
<< " because classic reg is alive\n");
--End;
continue;
}
-AnyAliasAlive = AliveAtStart;
-AnyAliasAlive &= BC.MIB->getAliases(ExtReg);
-if (AnyAliasAlive.any()) {
+if (AliveAtStart.anyCommon(BC.MIB->getAliases(ExtReg))) {
LLVM_DEBUG(dbgs() << " Bailed on " << BC.MRI->getName(ClassicReg)
<< " with " << BC.MRI->getName(ExtReg)
<< " because extended reg is alive\n");
diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp
b/bolt/lib/Passes/ShrinkWrapping.cpp
index fe342ccd38a67..b882e2512866d 100644
--- a/bolt/lib/Passes/ShrinkWrapping.cpp
+++ b/bolt/lib/Passes/ShrinkWrapping.cpp
@@ -1100,9 +1100,8 @@ SmallVector
ShrinkWrapping::fixPopsPlacements(
bool Found = false;
if (SPT.getStateAt(ProgramPoint::getLastPointAt(*BB))->first ==
SaveOffset) {
- BitVector BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
- BV &= UsesByReg[CSR];
- if (!BV.any()) {
+ const BitVector &BV = *RI.getStateAt(ProgramPoint::getLastPointAt(*BB));
+ if (!BV.anyCommon(UsesByReg[CSR])) {
Found = true;
PP = BB;
continue;
@@ -1110,9 +1109,8 @@ SmallVector
ShrinkWrapping::fixPopsPlacements(
}
for (MCInst &Inst : llvm::reverse(*BB)) {
if (SPT.getStateBefore(Inst)->first == SaveOffset) {
-BitVector BV = *RI.getStateAt(Inst);
-BV &= UsesByReg[CSR];
-if (!BV.any()) {
+const BitVector &BV = *RI.getStateAt(Inst);
+if (!BV.anyCommon(UsesByReg[CSR])) {
Found = true;
PP = &Inst;
break;
diff --git a/bolt/lib/Passes/StackAvailableExpressions.cpp
b/bolt/lib/Passes/StackAvailableExpressions.cpp
index a0d361f273de2..c685cc19badc3 100644
--- a/bolt/lib/Passes/StackAvailableExpressions.cpp
+++ b/bolt/lib/Passes/StackAvailableExpressions.cpp
@@ -103,8 +103,7 @@ bool StackAvailableExpressions::doesXKillsY(const MCInst
*X
[llvm-branch-commits] [flang] [flang][OpenMP] Generalize checks of loop construct structure (PR #170735)
https://github.com/kparzysz updated
https://github.com/llvm/llvm-project/pull/170735
>From 9a2d3dca08ab237e7e949fd5642c96cf0fba89b8 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek
Date: Tue, 2 Dec 2025 14:59:34 -0600
Subject: [PATCH 1/3] [flang][OpenMP] Generalize checks of loop construct
structure
For an OpenMP loop construct, count how many loops will effectively be
contained in its associated block. For constructs that are loop-nest
associated this number should be 1. Report cases where this number is
different.
Take into account that the block associated with a loop construct can
contain compiler directives.
---
flang/lib/Semantics/check-omp-loop.cpp| 201 +++---
flang/lib/Semantics/check-omp-structure.h | 3 +-
flang/test/Parser/OpenMP/tile-fail.f90| 8 +-
flang/test/Semantics/OpenMP/do21.f90 | 10 +-
.../Semantics/OpenMP/loop-association.f90 | 6 +-
.../OpenMP/loop-transformation-clauses01.f90 | 16 +-
.../loop-transformation-construct01.f90 | 4 +-
.../loop-transformation-construct02.f90 | 8 +-
.../loop-transformation-construct04.f90 | 4 +-
9 files changed, 156 insertions(+), 104 deletions(-)
diff --git a/flang/lib/Semantics/check-omp-loop.cpp
b/flang/lib/Semantics/check-omp-loop.cpp
index fc4b9222d91b3..6414f0028e008 100644
--- a/flang/lib/Semantics/check-omp-loop.cpp
+++ b/flang/lib/Semantics/check-omp-loop.cpp
@@ -37,6 +37,14 @@
#include
#include
+namespace Fortran::semantics {
+static bool IsLoopTransforming(llvm::omp::Directive dir);
+static bool IsFullUnroll(const parser::OpenMPLoopConstruct &x);
+static std::optional CountGeneratedLoops(
+const parser::ExecutionPartConstruct &epc);
+static std::optional CountGeneratedLoops(const parser::Block &block);
+} // namespace Fortran::semantics
+
namespace {
using namespace Fortran;
@@ -263,22 +271,19 @@ static bool IsLoopTransforming(llvm::omp::Directive dir) {
}
void OmpStructureChecker::CheckNestedBlock(const parser::OpenMPLoopConstruct
&x,
-const parser::Block &body, size_t &nestedCount) {
+const parser::Block &body) {
for (auto &stmt : body) {
if (auto *dir{parser::Unwrap(stmt)}) {
context_.Say(dir->source,
"Compiler directives are not allowed inside OpenMP loop
constructs"_warn_en_US);
-} else if (parser::Unwrap(stmt)) {
- ++nestedCount;
} else if (auto *omp{parser::Unwrap(stmt)}) {
if (!IsLoopTransforming(omp->BeginDir().DirName().v)) {
context_.Say(omp->source,
"Only loop-transforming OpenMP constructs are allowed inside
OpenMP loop constructs"_err_en_US);
}
- ++nestedCount;
} else if (auto *block{parser::Unwrap(stmt)}) {
- CheckNestedBlock(x, std::get(block->t), nestedCount);
-} else {
+ CheckNestedBlock(x, std::get(block->t));
+} else if (!parser::Unwrap(stmt)) {
parser::CharBlock source{parser::GetSource(stmt).value_or(x.source)};
context_.Say(source,
"OpenMP loop construct can only contain DO loops or
loop-nest-generating OpenMP constructs"_err_en_US);
@@ -286,16 +291,96 @@ void OmpStructureChecker::CheckNestedBlock(const
parser::OpenMPLoopConstruct &x,
}
}
+static bool IsFullUnroll(const parser::OpenMPLoopConstruct &x) {
+ const parser::OmpDirectiveSpecification &beginSpec{x.BeginDir()};
+
+ if (beginSpec.DirName().v == llvm::omp::Directive::OMPD_unroll) {
+return llvm::none_of(beginSpec.Clauses().v, [](const parser::OmpClause &c)
{
+ return c.Id() == llvm::omp::Clause::OMPC_partial;
+});
+ }
+ return false;
+}
+
+static std::optional CountGeneratedLoops(
+const parser::ExecutionPartConstruct &epc) {
+ if (parser::Unwrap(epc)) {
+return 1;
+ }
+
+ auto &omp{DEREF(parser::Unwrap(epc))};
+ const parser::OmpDirectiveSpecification &beginSpec{omp.BeginDir()};
+ llvm::omp::Directive dir{beginSpec.DirName().v};
+
+ // TODO: Handle split, apply.
+ if (IsFullUnroll(omp)) {
+return std::nullopt;
+ }
+ if (dir == llvm::omp::Directive::OMPD_fuse) {
+auto rangeAt{
+llvm::find_if(beginSpec.Clauses().v, [](const parser::OmpClause &c) {
+ return c.Id() == llvm::omp::Clause::OMPC_looprange;
+})};
+if (rangeAt == beginSpec.Clauses().v.end()) {
+ return std::nullopt;
+}
+
+auto *loopRange{parser::Unwrap(*rangeAt)};
+std::optional count{GetIntValue(std::get<1>(loopRange->t))};
+if (!count || *count <= 0) {
+ return std::nullopt;
+}
+if (auto nestedCount{CountGeneratedLoops(std::get(omp.t))})
{
+ return 1 + *nestedCount - static_cast(*count);
+} else {
+ return std::nullopt;
+}
+ }
+
+ // For every other loop construct return 1.
+ return 1;
+}
+
+static std::optional CountGeneratedLoops(const parser::Block &block) {
+ // Count the number of loops in the associated block. If there are any
+ // malformed construct in there, getting the number may be meaningless.
+ // These issue
[llvm-branch-commits] [FlowSensitive] [StatusOr] [14/N] Support nested StatusOrs (PR #170950)
https://github.com/fmayer edited https://github.com/llvm/llvm-project/pull/170950 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use different name scope for MIMGBaseOpcode (PR #170904)
https://github.com/mbrkusanin updated
https://github.com/llvm/llvm-project/pull/170904
From 882c6ec5c217e4a29963ba735a7068b28d92bf96 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin
Date: Fri, 5 Dec 2025 19:21:27 +0100
Subject: [PATCH 1/2] [AMDGPU] Use different name scope for MIMGBaseOpcode
Define MIMGBaseOpcode values with `enum class` instead of regular
`enum` so they will be in a separate name scope from regular
opcodes. These two groups of opcodes should not be mixed and
keeping them in different scopes will reduce a chance of
introducing bugs.
---
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h | 6 ++---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 2 +-
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 19 ---
llvm/lib/Target/AMDGPU/MIMGInstructions.td| 3 ++-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 23 ++-
.../Target/AMDGPU/SILoadStoreOptimizer.cpp| 4 +++-
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp| 2 +-
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 16 ++---
8 files changed, 41 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 529da8d28a3c1..328ef3e10f5c9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -49,8 +49,8 @@ const D16ImageDimIntrinsic
*lookupD16ImageDimIntrinsic(unsigned Intr);
struct ImageDimIntrinsicInfo {
unsigned Intr;
- unsigned BaseOpcode;
- unsigned AtomicNoRetBaseOpcode;
+ MIMGBaseOpcode BaseOpcode;
+ MIMGBaseOpcode AtomicNoRetBaseOpcode;
MIMGDim Dim;
uint8_t NumOffsetArgs;
@@ -85,7 +85,7 @@ struct ImageDimIntrinsicInfo {
const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);
const ImageDimIntrinsicInfo *
-getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim);
+getImageDimIntrinsicByBaseOpcode(MIMGBaseOpcode BaseOpcode, unsigned Dim);
} // end AMDGPU namespace
} // End llvm namespace
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 15492144ba615..e3d4a063c7d3a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2054,7 +2054,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
- unsigned IntrOpcode = Intr->BaseOpcode;
+ AMDGPU::MIMGBaseOpcode IntrOpcode = Intr->BaseOpcode;
// For image atomic: use no-return opcode if result is unused.
if (Intr->AtomicNoRetBaseOpcode != Intr->BaseOpcode) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index cb1a4ee6d542e..137aa7d3d18e9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -6850,8 +6850,10 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
}
const bool IsAtomicPacked16Bit =
- (BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
- BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16);
+ (BaseOpcode->BaseOpcode ==
+ AMDGPU::MIMGBaseOpcode::IMAGE_ATOMIC_PK_ADD_F16 ||
+ BaseOpcode->BaseOpcode ==
+ AMDGPU::MIMGBaseOpcode::IMAGE_ATOMIC_PK_ADD_BF16);
// Check for 16 bit addresses and pack if true.
LLT GradTy =
@@ -7476,10 +7478,11 @@ bool
AMDGPULegalizerInfo::legalizeBVHIntersectRayIntrinsic(
const bool UseNSA =
IsGFX12Plus || (ST.hasNSAEncoding() && NumVAddrs <= ST.getNSAMaxSize());
- const unsigned BaseOpcodes[2][2] = {
- {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
- {AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
- AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
+ const AMDGPU::MIMGBaseOpcode BaseOpcodes[2][2] = {
+ {AMDGPU::MIMGBaseOpcode::IMAGE_BVH_INTERSECT_RAY,
+ AMDGPU::MIMGBaseOpcode::IMAGE_BVH_INTERSECT_RAY_a16},
+ {AMDGPU::MIMGBaseOpcode::IMAGE_BVH64_INTERSECT_RAY,
+ AMDGPU::MIMGBaseOpcode::IMAGE_BVH64_INTERSECT_RAY_a16}};
int Opcode;
if (UseNSA) {
Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
@@ -7622,8 +7625,8 @@ bool
AMDGPULegalizerInfo::legalizeBVHDualOrBVH8IntersectRayIntrinsic(
const unsigned NumVDataDwords = 10;
const unsigned NumVAddrDwords = IsBVH8 ? 11 : 12;
int Opcode = AMDGPU::getMIMGOpcode(
- IsBVH8 ? AMDGPU::IMAGE_BVH8_INTERSECT_RAY
- : AMDGPU::IMAGE_BVH_DUAL_INTERSECT_RAY,
+ IsBVH8 ? AMDGPU::MIMGBaseOpcode::IMAGE_BVH8_INTERSECT_RAY
+ : AMDGPU::MIMGBaseOpcode::IMAGE_BVH_DUAL_INTERSECT_RAY,
AMDGPU::MIMGEncGfx12, NumVDataDwords, NumVAddrDwords);
assert(Opcode != -1);
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 65dce74a1e894..494c3c07ea
[llvm-branch-commits] [clang] [LifetimeSafety] Add implicit tracking for STL functions (PR #170005)
https://github.com/ymand approved this pull request. https://github.com/llvm/llvm-project/pull/170005 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [LifetimeSafety] Add implicit tracking for STL functions (PR #170005)
@@ -71,6 +71,88 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl
*FD) {
return isNormalAssignmentOperator(FD);
}
+// Decl::isInStdNamespace will return false for iterators in some STL
+// implementations due to them being defined in a namespace outside of the std
+// namespace.
+static bool isInStlNamespace(const Decl *D) {
+ const DeclContext *DC = D->getDeclContext();
+ if (!DC)
+return false;
+ if (const auto *ND = dyn_cast(DC))
+if (const IdentifierInfo *II = ND->getIdentifier()) {
+ StringRef Name = II->getName();
+ if (Name.size() >= 2 && Name.front() == '_' &&
+ (Name[1] == '_' || isUppercase(Name[1])))
+return true;
+}
+
+ return DC->isStdNamespace();
+}
+
+static bool isPointerLikeType(QualType QT) {
+ return isGslPointerType(QT) || QT->isPointerType() || QT->isNullPtrType();
+}
+
+bool shouldTrackImplicitObjectArg(const CXXMethodDecl *Callee) {
+ if (auto *Conv = dyn_cast_or_null(Callee))
+if (isGslPointerType(Conv->getConversionType()) &&
+Callee->getParent()->hasAttr())
+ return true;
+ if (!isInStlNamespace(Callee->getParent()))
+return false;
+ if (!isGslPointerType(Callee->getFunctionObjectParameterType()) &&
+ !isGslOwnerType(Callee->getFunctionObjectParameterType()))
+return false;
+ if (isPointerLikeType(Callee->getReturnType())) {
+if (!Callee->getIdentifier())
+ return false;
+return llvm::StringSwitch(Callee->getName())
+.Cases({"begin", "rbegin", "cbegin", "crbegin"}, true)
+.Cases({"end", "rend", "cend", "crend"}, true)
+.Cases({"c_str", "data", "get"}, true)
ymand wrote:
why split into three separate `Cases`?
https://github.com/llvm/llvm-project/pull/170005
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [LifetimeSafety] Add implicit tracking for STL functions (PR #170005)
@@ -71,6 +71,88 @@ bool implicitObjectParamIsLifetimeBound(const FunctionDecl
*FD) {
return isNormalAssignmentOperator(FD);
}
+// Decl::isInStdNamespace will return false for iterators in some STL
ymand wrote:
nit: maybe preface with "This function is needed because "?
https://github.com/llvm/llvm-project/pull/170005
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use different name scope for MIMGEncoding and MIMGDim (PR #171166)
llvmbot wrote:
@llvm/pr-subscribers-backend-amdgpu
Author: Mirko Brkušanin (mbrkusanin)
Changes
Use new scoped enums with type set to uint8_t.
---
Patch is 22.40 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/171166.diff
12 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h (+1-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+22-16)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+12-10)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+5-5)
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp (+3-3)
- (modified) llvm/lib/Target/AMDGPU/MIMGInstructions.td (+18-16)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+34-26)
- (modified) llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp (+1-1)
- (modified) llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp (+6-6)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+4-4)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+3-3)
``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 328ef3e10f5c9..480bdb4b1d04f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -85,7 +85,7 @@ struct ImageDimIntrinsicInfo {
const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);
const ImageDimIntrinsicInfo *
-getImageDimIntrinsicByBaseOpcode(MIMGBaseOpcode BaseOpcode, unsigned Dim);
+getImageDimIntrinsicByBaseOpcode(MIMGBaseOpcode BaseOpcode, MIMGDim Dim);
} // end AMDGPU namespace
} // End llvm namespace
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index e3d4a063c7d3a..6bd8e54eac2ee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2196,22 +2196,26 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
int Opcode = -1;
if (IsGFX12Plus) {
-Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx12,
- NumVDataDwords, NumVAddrDwords);
+Opcode =
+AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncoding::MIMGEncGfx12,
+ NumVDataDwords, NumVAddrDwords);
} else if (IsGFX11Plus) {
-Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
- UseNSA ? AMDGPU::MIMGEncGfx11NSA
- : AMDGPU::MIMGEncGfx11Default,
- NumVDataDwords, NumVAddrDwords);
+Opcode = AMDGPU::getMIMGOpcode(
+IntrOpcode,
+UseNSA ? AMDGPU::MIMGEncoding::MIMGEncGfx11NSA
+ : AMDGPU::MIMGEncoding::MIMGEncGfx11Default,
+NumVDataDwords, NumVAddrDwords);
} else if (IsGFX10Plus) {
-Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
- UseNSA ? AMDGPU::MIMGEncGfx10NSA
- : AMDGPU::MIMGEncGfx10Default,
- NumVDataDwords, NumVAddrDwords);
+Opcode = AMDGPU::getMIMGOpcode(
+IntrOpcode,
+UseNSA ? AMDGPU::MIMGEncoding::MIMGEncGfx10NSA
+ : AMDGPU::MIMGEncoding::MIMGEncGfx10Default,
+NumVDataDwords, NumVAddrDwords);
} else {
if (Subtarget->hasGFX90AInsts()) {
- Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a,
- NumVDataDwords, NumVAddrDwords);
+ Opcode =
+ AMDGPU::getMIMGOpcode(IntrOpcode,
AMDGPU::MIMGEncoding::MIMGEncGfx90a,
+NumVDataDwords, NumVAddrDwords);
if (Opcode == -1) {
LLVM_DEBUG(
dbgs()
@@ -2221,11 +2225,13 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
}
if (Opcode == -1 &&
STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
- NumVDataDwords, NumVAddrDwords);
+ Opcode =
+ AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncoding::MIMGEncGfx8,
+NumVDataDwords, NumVAddrDwords);
if (Opcode == -1)
- Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
- NumVDataDwords, NumVAddrDwords);
+ Opcode =
+ AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncoding::MIMGEncGfx6,
+NumVDataDwords, NumVAddrDwords);
}
if (Opcode == -1)
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 137aa7d3d18e9..9f886b8f4c155 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7485,17 +748
