[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
https://github.com/uweigand edited https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: account for BRK when searching for auth oracles (PR #137975)
@@ -1751,6 +1750,25 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { Inst.addOperand(MCOperand::createImm(0)); } + bool isTrap(const MCInst &Inst) const override { +if (Inst.getOpcode() != AArch64::BRK) + return false; +// Only match the immediate values that are likely to indicate this BRK +// instruction is emitted to terminate the program immediately and not to +// be handled by a SIGTRAP handler, for example. +switch (Inst.getOperand(0).getImm()) { +case 0xc470: +case 0xc471: +case 0xc472: +case 0xc473: + // Explicit Pointer Authentication check failed, see + // AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue(). kbeyls wrote: I'm not sure if it's a good idea to only consider pauthabi-specific BRK values in a "generic" AArch64-interface to test whether something is a trap. This "isTrap" function might get used by other analyses too... I wonder if there would be a way to change the interface of `isTrap` to make it appropriately generic so that it could be used without confusion by other analyses too? An example is this commit that makes the pac-ret analysis more accurate, which I guess hasn't been upstreamed yet: https://github.com/llvm/llvm-project/commit/5b3ed529abd6f6025c9012e5930375c5b577e555 https://github.com/llvm/llvm-project/pull/137975 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
@@ -0,0 +1,67 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x6000 +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x8000 + +## Expect to find the branch labels and global variable name. +# CHECK: <_start>: +# CHECK-NEXT: ldr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adrp x1, [[ABS_ADRP_VAL]] +# CHECK-NEXT: adr x2, +# CHECK-NEXT: cmp x1, x2 +# CHECK-NEXT: b.eq +# CHECK-NEXT: b +# CHECK-NEXT: : +# CHECK-NEXT: cbz x2, +# CHECK-NEXT: ret + +## Machine code generated with: MaskRay wrote: Consider adopting llvm/utils/update_test_body.py to make this easier to re-create https://llvm.org/docs/TestingGuide.html#elaborated-tests https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] a935d5a - Revert "[lldb] Add count for number of DWO files loaded in statistics (#144424)"
Author: Michael Buch Date: 2025-06-24T11:32:34+01:00 New Revision: a935d5ab75b11f8fc1f1196e3ee4bcd15a9102ce URL: https://github.com/llvm/llvm-project/commit/a935d5ab75b11f8fc1f1196e3ee4bcd15a9102ce DIFF: https://github.com/llvm/llvm-project/commit/a935d5ab75b11f8fc1f1196e3ee4bcd15a9102ce.diff LOG: Revert "[lldb] Add count for number of DWO files loaded in statistics (#144424)" This reverts commit 3095d3a47d624b573d0748ee37f8f201d5702b63. Added: Modified: lldb/include/lldb/Symbol/SymbolFile.h lldb/include/lldb/Target/Statistics.h lldb/packages/Python/lldbsuite/test/builders/builder.py lldb/packages/Python/lldbsuite/test/make/Makefile.rules lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h lldb/source/Target/Statistics.cpp lldb/test/API/commands/statistics/basic/TestStats.py Removed: lldb/test/API/commands/statistics/basic/baz.cpp lldb/test/API/commands/statistics/basic/third.cpp diff --git a/lldb/include/lldb/Symbol/SymbolFile.h b/lldb/include/lldb/Symbol/SymbolFile.h index b0b608d0a5e79..75c7f230ddf3d 100644 --- a/lldb/include/lldb/Symbol/SymbolFile.h +++ b/lldb/include/lldb/Symbol/SymbolFile.h @@ -472,14 +472,6 @@ class SymbolFile : public PluginInterface { return false; }; - /// Get number of loaded/parsed DWO files. This is emitted in "statistics - /// dump" - /// - /// \returns - /// A pair containing (loaded_dwo_count, total_dwo_count). If this - /// symbol file doesn't support DWO files, both counts will be 0. - virtual std::pair GetDwoFileCounts() { return {0, 0}; } - virtual lldb::TypeSP MakeType(lldb::user_id_t uid, ConstString name, std::optional byte_size, SymbolContextScope *context, diff --git a/lldb/include/lldb/Target/Statistics.h b/lldb/include/lldb/Target/Statistics.h index 42f03798c219e..2d0d25cd3c753 100644 --- a/lldb/include/lldb/Target/Statistics.h +++ b/lldb/include/lldb/Target/Statistics.h @@ -153,8 +153,6 @@ struct ModuleStats { bool symtab_stripped = false; bool debug_info_had_variable_errors = false; bool debug_info_had_incomplete_types = false; - uint32_t dwo_file_count = 0; - uint32_t loaded_dwo_file_count = 0; }; struct ConstStringStats { diff --git a/lldb/packages/Python/lldbsuite/test/builders/builder.py b/lldb/packages/Python/lldbsuite/test/builders/builder.py index efb1ba568e3e6..de05732469448 100644 --- a/lldb/packages/Python/lldbsuite/test/builders/builder.py +++ b/lldb/packages/Python/lldbsuite/test/builders/builder.py @@ -247,25 +247,13 @@ def getLLDBObjRoot(self): def _getDebugInfoArgs(self, debug_info): if debug_info is None: return [] - -debug_options = debug_info if isinstance(debug_info, list) else [debug_info] -option_flags = { -"dwarf": {"MAKE_DSYM": "NO"}, -"dwo": {"MAKE_DSYM": "NO", "MAKE_DWO": "YES"}, -"gmodules": {"MAKE_DSYM": "NO", "MAKE_GMODULES": "YES"}, -"debug_names": {"MAKE_DEBUG_NAMES": "YES"}, -"dwp": {"MAKE_DSYM": "NO", "MAKE_DWP": "YES"}, -} - -# Collect all flags, with later options overriding earlier ones -flags = {} - -for option in debug_options: -if not option or option not in option_flags: -return None # Invalid options -flags.update(option_flags[option]) - -return [f"{key}={value}" for key, value in flags.items()] +if debug_info == "dwarf": +return ["MAKE_DSYM=NO"] +if debug_info == "dwo": +return ["MAKE_DSYM=NO", "MAKE_DWO=YES"] +if debug_info == "gmodules": +return ["MAKE_DSYM=NO", "MAKE_GMODULES=YES"] +return None def getBuildCommand( self, diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules index 58833e1b0cc78..06959f226066a 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules @@ -276,10 +276,6 @@ ifeq "$(MAKE_DWO)" "YES" CFLAGS += -gsplit-dwarf endif -ifeq "$(MAKE_DEBUG_NAMES)" "YES" - CFLAGS += -gpubnames -endif - ifeq "$(USE_PRIVATE_MODULE_CACHE)" "YES" THE_CLANG_MODULE_CACHE_DIR := $(BUILDDIR)/private-module-cache else diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index c83779c40a05b..71f204c03a42a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -4420,32 +4420,3 @@ void SymbolFileDWARF::GetCompileOptions( args.insert({comp_unit, Args(flags)}); } } - -std::pair SymbolFileDWARF::GetDwoFileCounts() { - uint32_t total_dwo_count
[llvm-branch-commits] [llvm] [pgo] add means to specify "unknown" MD_prof (PR #145578)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff HEAD~1 HEAD --extensions cpp,h -- llvm/include/llvm/IR/ProfDataUtils.h llvm/lib/IR/ProfDataUtils.cpp llvm/lib/IR/Verifier.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index 59a5d99f0..89fa7f735 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -142,7 +142,7 @@ LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef Weights, /// info. LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I); -LLVM_ABI bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode& MD); +LLVM_ABI bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD); LLVM_ABI bool hasExplicitlyUnknownBranchWeights(const Instruction &I); /// Scaling the profile data attached to 'I' using the ratio of S/T. diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 6eb7d4377..1585771c0 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -44,8 +44,7 @@ constexpr unsigned MinBWOps = 3; // the minimum number of operands for MD_prof nodes with value profiles constexpr unsigned MinVPOps = 5; -const char* UnknownBranchWeightsMarker = "unknown"; - +const char *UnknownBranchWeightsMarker = "unknown"; // We may want to add support for other MD_prof types, so provide an abstraction // for checking the metadata type. @@ -250,7 +249,8 @@ bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD) { bool hasExplicitlyUnknownBranchWeights(const Instruction &I) { auto *MD = I.getMetadata(LLVMContext::MD_prof); - if (!MD) return false; + if (!MD) +return false; return isExplicitlyUnknownBranchWeightsMetadata(*MD); } `` https://github.com/llvm/llvm-project/pull/145578 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR][PGO] Verify the structure of `VP` metadata. (PR #145584)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/145584 >From a9742de1b38e1745268e970845d187108ba292bf Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 24 Jun 2025 13:14:09 -0700 Subject: [PATCH] [IR][PGO] Verify the structure of `VP` metadata. --- llvm/lib/IR/Verifier.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ae95e3e2bff8d..4236d9bb9630b 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5008,9 +5008,10 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { Check(mdconst::dyn_extract(MDO), "!prof brunch_weights operand is not a const int"); } + } else if (ProfName == "VP") { + } else { -Check(ProfName == "VP", "expected either branch_weights or VP profile name", - MD); +CheckFailed("expected either branch_weights or VP profile name", MD); } } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [pgo] add means to specify "unknown" MD_prof (PR #145578)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/145578 >From 6c6ac88c73fbffa56983bf8a0cf269e0bc59cb14 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 24 Jun 2025 09:50:40 -0700 Subject: [PATCH] [pgo] add means to specify "unknown" MD_prof --- llvm/include/llvm/IR/ProfDataUtils.h | 12 + llvm/lib/IR/ProfDataUtils.cpp | 22 llvm/lib/IR/Verifier.cpp | 3 +++ llvm/test/Bitcode/branch-weight-unknown.ll | 30 ++ 4 files changed, 67 insertions(+) create mode 100644 llvm/test/Bitcode/branch-weight-unknown.ll diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index 8e8d069b836f1..89fa7f735f5d4 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -133,6 +133,18 @@ LLVM_ABI bool extractProfTotalWeight(const Instruction &I, LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef Weights, bool IsExpected); +/// Specify that the branch weights for this terminator cannot be known at +/// compile time. This should only be called by passes, and never as a default +/// behavior in e.g. MDBuilder. The goal is to use this info to validate passes +/// do not accidentally drop profile info, and this API is called in cases where +/// the pass explicitly cannot provide that info. Defaulting it in would hide +/// bugs where the pass forgets to transfer over or otherwise specify profile +/// info. +LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I); + +LLVM_ABI bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD); +LLVM_ABI bool hasExplicitlyUnknownBranchWeights(const Instruction &I); + /// Scaling the profile data attached to 'I' using the ratio of S/T. LLVM_ABI void scaleProfData(Instruction &I, uint64_t S, uint64_t T); diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 21524eb840539..1585771c0d0ae 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -44,6 +44,8 @@ constexpr unsigned MinBWOps = 3; // the minimum number of operands for MD_prof nodes with value profiles constexpr unsigned MinVPOps = 5; +const char *UnknownBranchWeightsMarker = "unknown"; + // We may want to add support for other MD_prof types, so provide an abstraction // for checking the metadata type. bool isTargetMD(const MDNode *ProfData, const char *Name, unsigned MinOps) { @@ -232,6 +234,26 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) { return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal); } +void setExplicitlyUnknownBranchWeights(Instruction &I) { + MDBuilder MDB(I.getContext()); + I.setMetadata(LLVMContext::MD_prof, +MDNode::get(I.getContext(), +MDB.createString(UnknownBranchWeightsMarker))); +} + +bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD) { + if (MD.getNumOperands() != 1) +return false; + return MD.getOperand(0).equalsStr(UnknownBranchWeightsMarker); +} + +bool hasExplicitlyUnknownBranchWeights(const Instruction &I) { + auto *MD = I.getMetadata(LLVMContext::MD_prof); + if (!MD) +return false; + return isExplicitlyUnknownBranchWeightsMetadata(*MD); +} + void setBranchWeights(Instruction &I, ArrayRef Weights, bool IsExpected) { MDBuilder MDB(I.getContext()); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ae95e3e2bff8d..0ffe4ac257da5 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4964,6 +4964,9 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { } void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { + if (isExplicitlyUnknownBranchWeightsMetadata(*MD)) +return; + Check(MD->getNumOperands() >= 2, "!prof annotations should have no less than 2 operands", MD); diff --git a/llvm/test/Bitcode/branch-weight-unknown.ll b/llvm/test/Bitcode/branch-weight-unknown.ll new file mode 100644 index 0..921be1ff5da97 --- /dev/null +++ b/llvm/test/Bitcode/branch-weight-unknown.ll @@ -0,0 +1,30 @@ +; Test branch weight unknown validation + +; RUN: split-file %s %t +; RUN: opt -passes=verify %t/correct.ll --disable-output +; RUN: not opt -passes=verify %t/incorrect.ll --disable-output +; RUN: not opt -passes=verify %t/on_function.ll --disable-output + +;--- correct.ll +define void @correct(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %no, !prof !0 +yes: + ret void +no: + ret void +} + +!0 = !{!"unknown"} + +;--- incorrect.ll +define void @correct(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %no, !prof !0 +yes: + ret void +no: + ret void +} + +!0 = !{!"unknown", i32 12, i32 67} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cg
[llvm-branch-commits] [llvm] [IR][PGO] Verify the structure of `VP` metadata. (PR #145584)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/145584 >From a9742de1b38e1745268e970845d187108ba292bf Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 24 Jun 2025 13:14:09 -0700 Subject: [PATCH] [IR][PGO] Verify the structure of `VP` metadata. --- llvm/lib/IR/Verifier.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ae95e3e2bff8d..4236d9bb9630b 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5008,9 +5008,10 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { Check(mdconst::dyn_extract(MDO), "!prof brunch_weights operand is not a const int"); } + } else if (ProfName == "VP") { + } else { -Check(ProfName == "VP", "expected either branch_weights or VP profile name", - MD); +CheckFailed("expected either branch_weights or VP profile name", MD); } } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [pgo] add means to specify "unknown" MD_prof (PR #145578)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/145578 >From e2f673682594ceafc73e5b7765934caabffd4907 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 24 Jun 2025 09:50:40 -0700 Subject: [PATCH] [pgo] add means to specify "unknown" MD_prof --- llvm/include/llvm/IR/ProfDataUtils.h | 12 +++ llvm/lib/IR/ProfDataUtils.cpp| 22 + llvm/lib/IR/Verifier.cpp | 46 +++--- llvm/test/Verifier/branch-weight.ll | 130 +-- 4 files changed, 187 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index 8e8d069b836f1..89fa7f735f5d4 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -133,6 +133,18 @@ LLVM_ABI bool extractProfTotalWeight(const Instruction &I, LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef Weights, bool IsExpected); +/// Specify that the branch weights for this terminator cannot be known at +/// compile time. This should only be called by passes, and never as a default +/// behavior in e.g. MDBuilder. The goal is to use this info to validate passes +/// do not accidentally drop profile info, and this API is called in cases where +/// the pass explicitly cannot provide that info. Defaulting it in would hide +/// bugs where the pass forgets to transfer over or otherwise specify profile +/// info. +LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I); + +LLVM_ABI bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD); +LLVM_ABI bool hasExplicitlyUnknownBranchWeights(const Instruction &I); + /// Scaling the profile data attached to 'I' using the ratio of S/T. LLVM_ABI void scaleProfData(Instruction &I, uint64_t S, uint64_t T); diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 21524eb840539..1585771c0d0ae 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -44,6 +44,8 @@ constexpr unsigned MinBWOps = 3; // the minimum number of operands for MD_prof nodes with value profiles constexpr unsigned MinVPOps = 5; +const char *UnknownBranchWeightsMarker = "unknown"; + // We may want to add support for other MD_prof types, so provide an abstraction // for checking the metadata type. bool isTargetMD(const MDNode *ProfData, const char *Name, unsigned MinOps) { @@ -232,6 +234,26 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) { return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal); } +void setExplicitlyUnknownBranchWeights(Instruction &I) { + MDBuilder MDB(I.getContext()); + I.setMetadata(LLVMContext::MD_prof, +MDNode::get(I.getContext(), +MDB.createString(UnknownBranchWeightsMarker))); +} + +bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD) { + if (MD.getNumOperands() != 1) +return false; + return MD.getOperand(0).equalsStr(UnknownBranchWeightsMarker); +} + +bool hasExplicitlyUnknownBranchWeights(const Instruction &I) { + auto *MD = I.getMetadata(LLVMContext::MD_prof); + if (!MD) +return false; + return isExplicitlyUnknownBranchWeightsMetadata(*MD); +} + void setBranchWeights(Instruction &I, ArrayRef Weights, bool IsExpected) { MDBuilder MDB(I.getContext()); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ae95e3e2bff8d..98fc31f75a031 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2508,6 +2508,12 @@ void Verifier::verifyFunctionMetadata( for (const auto &Pair : MDs) { if (Pair.first == LLVMContext::MD_prof) { MDNode *MD = Pair.second; + if (isExplicitlyUnknownBranchWeightsMetadata(*MD)) { +CheckFailed("'unknown' !prof metadata should appear only on " +"instructions supporting the 'branch_weights' metadata", +MD); +continue; + } Check(MD->getNumOperands() >= 2, "!prof annotations should have no less than 2 operands", MD); @@ -4964,6 +4970,30 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { } void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { + auto GetBranchingTerminatorNumOperands = [&]() { +unsigned ExpectedNumOperands = 0; +if (BranchInst *BI = dyn_cast(&I)) + ExpectedNumOperands = BI->getNumSuccessors(); +else if (SwitchInst *SI = dyn_cast(&I)) + ExpectedNumOperands = SI->getNumSuccessors(); +else if (isa(&I)) + ExpectedNumOperands = 1; +else if (IndirectBrInst *IBI = dyn_cast(&I)) + ExpectedNumOperands = IBI->getNumDestinations(); +else if (isa(&I)) + ExpectedNumOperands = 2; +else if (CallBrInst *CI = dyn_cast(&I)) + ExpectedNumOperands = CI->getNumSuccessors(); +return ExpectedNumOperands; + }; + if (isExplicitlyUnknownBranchWeightsMetadata(*MD)) { +Check(GetBran
[llvm-branch-commits] [compiler-rt] Rsan (PR #145540)
https://github.com/rymrg edited https://github.com/llvm/llvm-project/pull/145540 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [pgo] add means to specify "unknown" MD_prof (PR #145578)
https://github.com/mtrofin created https://github.com/llvm/llvm-project/pull/145578 None >From 1a15250e8b8cbdbb17613641fbd6003b4a901606 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 24 Jun 2025 09:50:40 -0700 Subject: [PATCH] [pgo] add means to specify "unknown" MD_prof --- llvm/include/llvm/IR/ProfDataUtils.h | 12 + llvm/lib/IR/ProfDataUtils.cpp | 22 llvm/lib/IR/Verifier.cpp | 3 +++ llvm/test/Bitcode/branch-weight-unknown.ll | 30 ++ 4 files changed, 67 insertions(+) create mode 100644 llvm/test/Bitcode/branch-weight-unknown.ll diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index 8e8d069b836f1..59a5d99f09512 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -133,6 +133,18 @@ LLVM_ABI bool extractProfTotalWeight(const Instruction &I, LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef Weights, bool IsExpected); +/// Specify that the branch weights for this terminator cannot be known at +/// compile time. This should only be called by passes, and never as a default +/// behavior in e.g. MDBuilder. The goal is to use this info to validate passes +/// do not accidentally drop profile info, and this API is called in cases where +/// the pass explicitly cannot provide that info. Defaulting it in would hide +/// bugs where the pass forgets to transfer over or otherwise specify profile +/// info. +LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I); + +LLVM_ABI bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode& MD); +LLVM_ABI bool hasExplicitlyUnknownBranchWeights(const Instruction &I); + /// Scaling the profile data attached to 'I' using the ratio of S/T. LLVM_ABI void scaleProfData(Instruction &I, uint64_t S, uint64_t T); diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 21524eb840539..6eb7d437764aa 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -44,6 +44,9 @@ constexpr unsigned MinBWOps = 3; // the minimum number of operands for MD_prof nodes with value profiles constexpr unsigned MinVPOps = 5; +const char* UnknownBranchWeightsMarker = "unknown"; + + // We may want to add support for other MD_prof types, so provide an abstraction // for checking the metadata type. bool isTargetMD(const MDNode *ProfData, const char *Name, unsigned MinOps) { @@ -232,6 +235,25 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) { return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal); } +void setExplicitlyUnknownBranchWeights(Instruction &I) { + MDBuilder MDB(I.getContext()); + I.setMetadata(LLVMContext::MD_prof, +MDNode::get(I.getContext(), +MDB.createString(UnknownBranchWeightsMarker))); +} + +bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD) { + if (MD.getNumOperands() != 1) +return false; + return MD.getOperand(0).equalsStr(UnknownBranchWeightsMarker); +} + +bool hasExplicitlyUnknownBranchWeights(const Instruction &I) { + auto *MD = I.getMetadata(LLVMContext::MD_prof); + if (!MD) return false; + return isExplicitlyUnknownBranchWeightsMetadata(*MD); +} + void setBranchWeights(Instruction &I, ArrayRef Weights, bool IsExpected) { MDBuilder MDB(I.getContext()); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ae95e3e2bff8d..0ffe4ac257da5 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4964,6 +4964,9 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { } void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { + if (isExplicitlyUnknownBranchWeightsMetadata(*MD)) +return; + Check(MD->getNumOperands() >= 2, "!prof annotations should have no less than 2 operands", MD); diff --git a/llvm/test/Bitcode/branch-weight-unknown.ll b/llvm/test/Bitcode/branch-weight-unknown.ll new file mode 100644 index 0..921be1ff5da97 --- /dev/null +++ b/llvm/test/Bitcode/branch-weight-unknown.ll @@ -0,0 +1,30 @@ +; Test branch weight unknown validation + +; RUN: split-file %s %t +; RUN: opt -passes=verify %t/correct.ll --disable-output +; RUN: not opt -passes=verify %t/incorrect.ll --disable-output +; RUN: not opt -passes=verify %t/on_function.ll --disable-output + +;--- correct.ll +define void @correct(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %no, !prof !0 +yes: + ret void +no: + ret void +} + +!0 = !{!"unknown"} + +;--- incorrect.ll +define void @correct(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %no, !prof !0 +yes: + ret void +no: + ret void +} + +!0 = !{!"unknown", i32 12, i32 67} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org
[llvm-branch-commits] [llvm] [pgo] add means to specify "unknown" MD_prof (PR #145578)
mtrofin wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/145578?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#145578** https://app.graphite.dev/github/pr/llvm/llvm-project/145578?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/145578?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#145576** https://app.graphite.dev/github/pr/llvm/llvm-project/145576?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/145578 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [pgo] add means to specify "unknown" MD_prof (PR #145578)
@@ -0,0 +1,30 @@ +; Test branch weight unknown validation + +; RUN: split-file %s %t +; RUN: opt -passes=verify %t/correct.ll --disable-output +; RUN: not opt -passes=verify %t/incorrect.ll --disable-output mtrofin wrote: yes, and I'll move these to llvm/test/Verifier/branch-weights.ll from the last patch https://github.com/llvm/llvm-project/pull/145578 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Add KnownBits simplification combines to RegBankCombiner (PR #141591)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/141591 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Gadget scanner: detect untrusted LR before tail call (PR #137224)
https://github.com/kbeyls commented: Thanks, mostly looks good, I only have 1 nitpicky comment about the underlying reason why the pauth analyzer should have a slightly different "definition" of what is considered a tail call versus BOLT overall. https://github.com/llvm/llvm-project/pull/137224 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
@@ -0,0 +1,67 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x6000 +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x8000 + +## Expect to find the branch labels and global variable name. +# CHECK: <_start>: +# CHECK-NEXT: ldr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adrp x1, [[ABS_ADRP_VAL]] +# CHECK-NEXT: adr x2, +# CHECK-NEXT: cmp x1, x2 +# CHECK-NEXT: b.eq +# CHECK-NEXT: b +# CHECK-NEXT: : +# CHECK-NEXT: cbz x2, +# CHECK-NEXT: ret + +## Machine code generated with: aengelke wrote: obj2yaml produces loads of unnecessary content (program headers, dynamic sections (dynsym/dynstr/hash/dynamic), its output is twice as long as this test currently is. I can do that, but I don't think it's worth the effort. https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] RSan (PR #145540)
https://github.com/rymrg edited https://github.com/llvm/llvm-project/pull/145540 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] RSan (PR #145540)
https://github.com/rymrg updated https://github.com/llvm/llvm-project/pull/145540 >From 89b3a5541fadc69b721d584a95d695e809eb1f78 Mon Sep 17 00:00:00 2001 From: rymrg <54061433+ry...@users.noreply.github.com> Date: Mon, 23 Jun 2025 21:55:32 +0300 Subject: [PATCH 1/2] RSan: https://doi.org/10.1145/3729277 Without value support --- compiler-rt/lib/tsan/rtl/CMakeLists.txt | 7 + compiler-rt/lib/tsan/rtl/rsan.cpp | 8 + compiler-rt/lib/tsan/rtl/rsan_action.hpp | 97 +++ compiler-rt/lib/tsan/rtl/rsan_arena.hpp | 45 ++ compiler-rt/lib/tsan/rtl/rsan_defs.hpp| 90 +++ compiler-rt/lib/tsan/rtl/rsan_dense_map.h | 714 ++ compiler-rt/lib/tsan/rtl/rsan_instrument.hpp | 358 + compiler-rt/lib/tsan/rtl/rsan_lock.hpp| 33 + compiler-rt/lib/tsan/rtl/rsan_map.hpp | 88 +++ compiler-rt/lib/tsan/rtl/rsan_memoryorder.hpp | 65 ++ compiler-rt/lib/tsan/rtl/rsan_report.cpp | 72 ++ compiler-rt/lib/tsan/rtl/rsan_report.hpp | 85 +++ .../lib/tsan/rtl/rsan_robustnessmodel.hpp | 286 +++ compiler-rt/lib/tsan/rtl/rsan_stacktrace.cpp | 134 compiler-rt/lib/tsan/rtl/rsan_stacktrace.hpp | 92 +++ compiler-rt/lib/tsan/rtl/rsan_vector.h| 178 + compiler-rt/lib/tsan/rtl/rsan_vectorclock.hpp | 115 +++ compiler-rt/lib/tsan/rtl/tsan_flags.inc | 3 + .../lib/tsan/rtl/tsan_interface_atomic.cpp| 107 ++- compiler-rt/lib/tsan/rtl/tsan_mman.cpp| 8 + compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp | 11 + 21 files changed, 2569 insertions(+), 27 deletions(-) create mode 100644 compiler-rt/lib/tsan/rtl/rsan.cpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_action.hpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_arena.hpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_defs.hpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_dense_map.h create mode 100644 compiler-rt/lib/tsan/rtl/rsan_instrument.hpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_lock.hpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_map.hpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_memoryorder.hpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_report.cpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_report.hpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_robustnessmodel.hpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_stacktrace.cpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_stacktrace.hpp create mode 100644 compiler-rt/lib/tsan/rtl/rsan_vector.h create mode 100644 compiler-rt/lib/tsan/rtl/rsan_vectorclock.hpp diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt index d7d84706bfd58..eb5f4a84fa359 100644 --- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt +++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt @@ -49,6 +49,9 @@ set(TSAN_SOURCES tsan_symbolize.cpp tsan_sync.cpp tsan_vector_clock.cpp + rsan.cpp + rsan_report.cpp + rsan_stacktrace.cpp ) set(TSAN_CXX_SOURCES @@ -59,6 +62,10 @@ set(TSAN_PREINIT_SOURCES tsan_preinit.cpp ) +set_source_files_properties(tsan_interface_atomic.cpp PROPERTIES COMPILE_FLAGS -std=c++20) +set_source_files_properties(tsan_mman.cpp PROPERTIES COMPILE_FLAGS -std=c++20) +set_source_files_properties(tsan_rtl_mutex.cpp PROPERTIES COMPILE_FLAGS -std=c++20) + if(APPLE) list(APPEND TSAN_SOURCES tsan_interceptors_mac.cpp diff --git a/compiler-rt/lib/tsan/rtl/rsan.cpp b/compiler-rt/lib/tsan/rtl/rsan.cpp new file mode 100644 index 0..fb696eb277b98 --- /dev/null +++ b/compiler-rt/lib/tsan/rtl/rsan.cpp @@ -0,0 +1,8 @@ +#include "rsan_vectorclock.hpp" +#include "rsan_robustnessmodel.hpp" +#include "rsan_instrument.hpp" +#include "rsan_map.hpp" +#include "rsan_arena.hpp" + +namespace Robustness{ +} // namespace Robustness diff --git a/compiler-rt/lib/tsan/rtl/rsan_action.hpp b/compiler-rt/lib/tsan/rtl/rsan_action.hpp new file mode 100644 index 0..a066b4e6ea8fc --- /dev/null +++ b/compiler-rt/lib/tsan/rtl/rsan_action.hpp @@ -0,0 +1,97 @@ +#pragma once +#include "rsan_defs.hpp" +namespace Robustness::Action{ + struct StoreAction{ + ThreadId tid; + Address addr; + int size; + }; + struct LoadAction{ + ThreadId tid; + Address addr; + int size; + }; + struct AtomicVerifyAction{ + ThreadId tid; + Address addr; + morder mo; + int size; + }; + struct AtomicVerifyStoreAction{ + ThreadId tid; + Address addr; + morder mo; + int size; + }; + struct AtomicLoadAction{ + ThreadId tid; + Address addr; + morder mo; + int size; + bool rmw; + DebugInfo dbg; + }; + struct AtomicStoreAction{ + ThreadId tid; + Address
[llvm-branch-commits] [compiler-rt] RSan (PR #145540)
llvmbot wrote: @llvm/pr-subscribers-compiler-rt-sanitizer Author: None (rymrg) Changes Initial RSan without value extension https://discourse.llvm.org/t/rfc-robustess-sanitizer/86831/ Paper: https://doi.org/10.1145/3729277 Preprint version: https://arxiv.org/pdf/2504.15036 Proper race detection depends on https://github.com/llvm/llvm-project/pull/142579 --- Patch is 85.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145540.diff 21 Files Affected: - (modified) compiler-rt/lib/tsan/rtl/CMakeLists.txt (+7) - (added) compiler-rt/lib/tsan/rtl/rsan.cpp (+8) - (added) compiler-rt/lib/tsan/rtl/rsan_action.hpp (+97) - (added) compiler-rt/lib/tsan/rtl/rsan_arena.hpp (+45) - (added) compiler-rt/lib/tsan/rtl/rsan_defs.hpp (+96) - (added) compiler-rt/lib/tsan/rtl/rsan_dense_map.h (+714) - (added) compiler-rt/lib/tsan/rtl/rsan_instrument.hpp (+358) - (added) compiler-rt/lib/tsan/rtl/rsan_lock.hpp (+33) - (added) compiler-rt/lib/tsan/rtl/rsan_map.hpp (+88) - (added) compiler-rt/lib/tsan/rtl/rsan_memoryorder.hpp (+65) - (added) compiler-rt/lib/tsan/rtl/rsan_report.cpp (+72) - (added) compiler-rt/lib/tsan/rtl/rsan_report.hpp (+85) - (added) compiler-rt/lib/tsan/rtl/rsan_robustnessmodel.hpp (+280) - (added) compiler-rt/lib/tsan/rtl/rsan_stacktrace.cpp (+134) - (added) compiler-rt/lib/tsan/rtl/rsan_stacktrace.hpp (+92) - (added) compiler-rt/lib/tsan/rtl/rsan_vector.h (+178) - (added) compiler-rt/lib/tsan/rtl/rsan_vectorclock.hpp (+115) - (modified) compiler-rt/lib/tsan/rtl/tsan_flags.inc (+3) - (modified) compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp (+80-27) - (modified) compiler-rt/lib/tsan/rtl/tsan_mman.cpp (+8) - (modified) compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp (+11) ``diff diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt index d7d84706bfd58..eb5f4a84fa359 100644 --- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt +++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt @@ -49,6 +49,9 @@ set(TSAN_SOURCES tsan_symbolize.cpp tsan_sync.cpp tsan_vector_clock.cpp + rsan.cpp + rsan_report.cpp + rsan_stacktrace.cpp ) set(TSAN_CXX_SOURCES @@ -59,6 +62,10 @@ set(TSAN_PREINIT_SOURCES tsan_preinit.cpp ) +set_source_files_properties(tsan_interface_atomic.cpp PROPERTIES COMPILE_FLAGS -std=c++20) +set_source_files_properties(tsan_mman.cpp PROPERTIES COMPILE_FLAGS -std=c++20) +set_source_files_properties(tsan_rtl_mutex.cpp PROPERTIES COMPILE_FLAGS -std=c++20) + if(APPLE) list(APPEND TSAN_SOURCES tsan_interceptors_mac.cpp diff --git a/compiler-rt/lib/tsan/rtl/rsan.cpp b/compiler-rt/lib/tsan/rtl/rsan.cpp new file mode 100644 index 0..fb696eb277b98 --- /dev/null +++ b/compiler-rt/lib/tsan/rtl/rsan.cpp @@ -0,0 +1,8 @@ +#include "rsan_vectorclock.hpp" +#include "rsan_robustnessmodel.hpp" +#include "rsan_instrument.hpp" +#include "rsan_map.hpp" +#include "rsan_arena.hpp" + +namespace Robustness{ +} // namespace Robustness diff --git a/compiler-rt/lib/tsan/rtl/rsan_action.hpp b/compiler-rt/lib/tsan/rtl/rsan_action.hpp new file mode 100644 index 0..a066b4e6ea8fc --- /dev/null +++ b/compiler-rt/lib/tsan/rtl/rsan_action.hpp @@ -0,0 +1,97 @@ +#pragma once +#include "rsan_defs.hpp" +namespace Robustness::Action{ + struct StoreAction{ + ThreadId tid; + Address addr; + int size; + }; + struct LoadAction{ + ThreadId tid; + Address addr; + int size; + }; + struct AtomicVerifyAction{ + ThreadId tid; + Address addr; + morder mo; + int size; + }; + struct AtomicVerifyStoreAction{ + ThreadId tid; + Address addr; + morder mo; + int size; + }; + struct AtomicLoadAction{ + ThreadId tid; + Address addr; + morder mo; + int size; + bool rmw; + DebugInfo dbg; + }; + struct AtomicStoreAction{ + ThreadId tid; + Address addr; + morder mo; + int size; + uint64_t oldValue; + uint64_t newValue; + DebugInfo dbg; + }; + struct AtomicRMWAction{ + ThreadId tid; + Address addr; + morder mo; + int size; + uint64_t oldValue; + uint64_t newValue; + DebugInfo dbg; + }; + struct AtomicCasAction{ + ThreadId tid; + Address addr; + morder mo; + int size; + uint64_t oldValue; + uint64_t newValue; + bool success; + DebugInfo dbg; + }; + struct FenceAction{ + ThreadId tid; + morder m
[llvm-branch-commits] [mlir] c0aa1f0 - Revert "[mlir] Improve mlir-query by adding matcher combinators (#141423)"
Author: Qinkun Bao Date: 2025-06-24T11:37:27-04:00 New Revision: c0aa1f007ad7d13e7e8c7949f4d7271f870c7f58 URL: https://github.com/llvm/llvm-project/commit/c0aa1f007ad7d13e7e8c7949f4d7271f870c7f58 DIFF: https://github.com/llvm/llvm-project/commit/c0aa1f007ad7d13e7e8c7949f4d7271f870c7f58.diff LOG: Revert "[mlir] Improve mlir-query by adding matcher combinators (#141423)" This reverts commit 12611a7fc71376e88aa01e3f0bbc74517f1a1703. Added: mlir/test/mlir-query/complex-test.mlir Modified: mlir/include/mlir/Query/Matcher/Marshallers.h mlir/include/mlir/Query/Matcher/MatchFinder.h mlir/include/mlir/Query/Matcher/MatchersInternal.h mlir/include/mlir/Query/Matcher/SliceMatchers.h mlir/include/mlir/Query/Matcher/VariantValue.h mlir/lib/Query/Matcher/CMakeLists.txt mlir/lib/Query/Matcher/RegistryManager.cpp mlir/lib/Query/Matcher/VariantValue.cpp mlir/lib/Query/Query.cpp mlir/tools/mlir-query/mlir-query.cpp Removed: mlir/lib/Query/Matcher/MatchersInternal.cpp mlir/test/mlir-query/backward-slice-union.mlir mlir/test/mlir-query/forward-slice-by-predicate.mlir mlir/test/mlir-query/logical-operator-test.mlir mlir/test/mlir-query/slice-function-extraction.mlir diff --git a/mlir/include/mlir/Query/Matcher/Marshallers.h b/mlir/include/mlir/Query/Matcher/Marshallers.h index 5fe6965f32efb..012bf7b9ec4a9 100644 --- a/mlir/include/mlir/Query/Matcher/Marshallers.h +++ b/mlir/include/mlir/Query/Matcher/Marshallers.h @@ -108,9 +108,6 @@ class MatcherDescriptor { const llvm::ArrayRef args, Diagnostics *error) const = 0; - // If the matcher is variadic, it can take any number of arguments. - virtual bool isVariadic() const = 0; - // Returns the number of arguments accepted by the matcher. virtual unsigned getNumArgs() const = 0; @@ -143,8 +140,6 @@ class FixedArgCountMatcherDescriptor : public MatcherDescriptor { return marshaller(matcherFunc, matcherName, nameRange, args, error); } - bool isVariadic() const override { return false; } - unsigned getNumArgs() const override { return argKinds.size(); } void getArgKinds(unsigned argNo, std::vector &kinds) const override { @@ -158,54 +153,6 @@ class FixedArgCountMatcherDescriptor : public MatcherDescriptor { const std::vector argKinds; }; -class VariadicOperatorMatcherDescriptor : public MatcherDescriptor { -public: - using VarOp = DynMatcher::VariadicOperator; - VariadicOperatorMatcherDescriptor(unsigned minCount, unsigned maxCount, -VarOp varOp, StringRef matcherName) - : minCount(minCount), maxCount(maxCount), varOp(varOp), -matcherName(matcherName) {} - - VariantMatcher create(SourceRange nameRange, ArrayRef args, -Diagnostics *error) const override { -if (args.size() < minCount || maxCount < args.size()) { - addError(error, nameRange, ErrorType::RegistryWrongArgCount, - {llvm::Twine("requires between "), llvm::Twine(minCount), -llvm::Twine(" and "), llvm::Twine(maxCount), -llvm::Twine(" args, got "), llvm::Twine(args.size())}); - return VariantMatcher(); -} - -std::vector innerArgs; -for (int64_t i = 0, e = args.size(); i != e; ++i) { - const ParserValue &arg = args[i]; - const VariantValue &value = arg.value; - if (!value.isMatcher()) { -addError(error, arg.range, ErrorType::RegistryWrongArgType, - {llvm::Twine(i + 1), llvm::Twine("matcher: "), - llvm::Twine(value.getTypeAsString())}); -return VariantMatcher(); - } - innerArgs.push_back(value.getMatcher()); -} -return VariantMatcher::VariadicOperatorMatcher(varOp, std::move(innerArgs)); - } - - bool isVariadic() const override { return true; } - - unsigned getNumArgs() const override { return 0; } - - void getArgKinds(unsigned argNo, std::vector &kinds) const override { -kinds.push_back(ArgKind(ArgKind::Matcher)); - } - -private: - const unsigned minCount; - const unsigned maxCount; - const VarOp varOp; - const StringRef matcherName; -}; - // Helper function to check if argument count matches expected count inline bool checkArgCount(SourceRange nameRange, size_t expectedArgCount, llvm::ArrayRef args, @@ -277,14 +224,6 @@ makeMatcherAutoMarshall(ReturnType (*matcherFunc)(ArgTypes...), reinterpret_cast(matcherFunc), matcherName, argKinds); } -// Variadic operator overload. -template -std::unique_ptr -makeMatcherAutoMarshall(VariadicOperatorMatcherFunc func, -StringRef matcherName) { - return std::make_unique( - MinCount, MaxCount, func.varOp, matcherName); -} } // namespace mlir::query::matcher::internal #endif // MLIR_TOOLS_MLIRQUERY_MATCHER_MARSHALLERS_H
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/jh7370 approved this pull request. LGTM, thanks. Probably want to wait for @MaskRay for final sign-off, but if he's too busy, land it in a couple of days anyway. https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Cleanup darwin bzero configuration (PR #145639)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 Author: Matt Arsenault (arsenm) Changes Write this in a more predicate-apply style instead of the switch. --- Full diff: https://github.com/llvm/llvm-project/pull/145639.diff 1 Files Affected: - (modified) llvm/lib/IR/RuntimeLibcalls.cpp (+2-10) ``diff diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index cb8c8457f5a47..5c01d8595d0f9 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -432,19 +432,11 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); // Some darwins have an optimized __bzero/bzero function. -switch (TT.getArch()) { -case Triple::x86: -case Triple::x86_64: +if (TT.isX86()) { if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6)) setLibcallName(RTLIB::BZERO, "__bzero"); - break; -case Triple::aarch64: -case Triple::aarch64_32: +} else if (TT.isAArch64()) setLibcallName(RTLIB::BZERO, "bzero"); - break; -default: - break; -} if (darwinHasSinCosStret(TT)) { setLibcallName(RTLIB::SINCOS_STRET_F32, "__sincosf_stret"); `` https://github.com/llvm/llvm-project/pull/145639 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Cleanup darwin bzero configuration (PR #145639)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/145639 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [pgo] add means to specify "unknown" MD_prof (PR #145578)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/145578 >From 6c6ac88c73fbffa56983bf8a0cf269e0bc59cb14 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 24 Jun 2025 09:50:40 -0700 Subject: [PATCH] [pgo] add means to specify "unknown" MD_prof --- llvm/include/llvm/IR/ProfDataUtils.h | 12 + llvm/lib/IR/ProfDataUtils.cpp | 22 llvm/lib/IR/Verifier.cpp | 3 +++ llvm/test/Bitcode/branch-weight-unknown.ll | 30 ++ 4 files changed, 67 insertions(+) create mode 100644 llvm/test/Bitcode/branch-weight-unknown.ll diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index 8e8d069b836f1..89fa7f735f5d4 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -133,6 +133,18 @@ LLVM_ABI bool extractProfTotalWeight(const Instruction &I, LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef Weights, bool IsExpected); +/// Specify that the branch weights for this terminator cannot be known at +/// compile time. This should only be called by passes, and never as a default +/// behavior in e.g. MDBuilder. The goal is to use this info to validate passes +/// do not accidentally drop profile info, and this API is called in cases where +/// the pass explicitly cannot provide that info. Defaulting it in would hide +/// bugs where the pass forgets to transfer over or otherwise specify profile +/// info. +LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I); + +LLVM_ABI bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD); +LLVM_ABI bool hasExplicitlyUnknownBranchWeights(const Instruction &I); + /// Scaling the profile data attached to 'I' using the ratio of S/T. LLVM_ABI void scaleProfData(Instruction &I, uint64_t S, uint64_t T); diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 21524eb840539..1585771c0d0ae 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -44,6 +44,8 @@ constexpr unsigned MinBWOps = 3; // the minimum number of operands for MD_prof nodes with value profiles constexpr unsigned MinVPOps = 5; +const char *UnknownBranchWeightsMarker = "unknown"; + // We may want to add support for other MD_prof types, so provide an abstraction // for checking the metadata type. bool isTargetMD(const MDNode *ProfData, const char *Name, unsigned MinOps) { @@ -232,6 +234,26 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalVal) { return extractProfTotalWeight(I.getMetadata(LLVMContext::MD_prof), TotalVal); } +void setExplicitlyUnknownBranchWeights(Instruction &I) { + MDBuilder MDB(I.getContext()); + I.setMetadata(LLVMContext::MD_prof, +MDNode::get(I.getContext(), +MDB.createString(UnknownBranchWeightsMarker))); +} + +bool isExplicitlyUnknownBranchWeightsMetadata(const MDNode &MD) { + if (MD.getNumOperands() != 1) +return false; + return MD.getOperand(0).equalsStr(UnknownBranchWeightsMarker); +} + +bool hasExplicitlyUnknownBranchWeights(const Instruction &I) { + auto *MD = I.getMetadata(LLVMContext::MD_prof); + if (!MD) +return false; + return isExplicitlyUnknownBranchWeightsMetadata(*MD); +} + void setBranchWeights(Instruction &I, ArrayRef Weights, bool IsExpected) { MDBuilder MDB(I.getContext()); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ae95e3e2bff8d..0ffe4ac257da5 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4964,6 +4964,9 @@ void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) { } void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { + if (isExplicitlyUnknownBranchWeightsMetadata(*MD)) +return; + Check(MD->getNumOperands() >= 2, "!prof annotations should have no less than 2 operands", MD); diff --git a/llvm/test/Bitcode/branch-weight-unknown.ll b/llvm/test/Bitcode/branch-weight-unknown.ll new file mode 100644 index 0..921be1ff5da97 --- /dev/null +++ b/llvm/test/Bitcode/branch-weight-unknown.ll @@ -0,0 +1,30 @@ +; Test branch weight unknown validation + +; RUN: split-file %s %t +; RUN: opt -passes=verify %t/correct.ll --disable-output +; RUN: not opt -passes=verify %t/incorrect.ll --disable-output +; RUN: not opt -passes=verify %t/on_function.ll --disable-output + +;--- correct.ll +define void @correct(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %no, !prof !0 +yes: + ret void +no: + ret void +} + +!0 = !{!"unknown"} + +;--- incorrect.ll +define void @correct(i32 %a) { + %c = icmp eq i32 %a, 0 + br i1 %c, label %yes, label %no, !prof !0 +yes: + ret void +no: + ret void +} + +!0 = !{!"unknown", i32 12, i32 67} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cg
[llvm-branch-commits] [llvm] [AMDGPU] Use reverse iteration in CodeGenPrepare (PR #145484)
@@ -2160,7 +2160,22 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; IEEE-GOODFREXP-NEXT:[[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i64 0 ; IEEE-GOODFREXP-NEXT:[[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1 ; IEEE-GOODFREXP-NEXT:store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT:[[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]] +; IEEE-GOODFREXP-NEXT:[[TMP56:%.*]] = extractelement <2 x float> [[X]], i64 0 Pierre-vh wrote: I don't understand why this changed that much, and whether it's a good thing or not I only observed this change and there's no similar codegen regression, so I assume it's a neutral change? https://github.com/llvm/llvm-project/pull/145484 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] AlwaysInliner: A new inlining algorithm to interleave alloca promotion with inlines. (PR #145613)
@@ -129,6 +147,245 @@ bool AlwaysInlineImpl( return Changed; } +/// Promote allocas to registers if possible. +static void promoteAllocas( +Function *Caller, SmallPtrSetImpl &AllocasToPromote, +function_ref &GetAssumptionCache) { + if (AllocasToPromote.empty()) +return; + + SmallVector PromotableAllocas; + llvm::copy_if(AllocasToPromote, std::back_inserter(PromotableAllocas), +isAllocaPromotable); + if (PromotableAllocas.empty()) +return; + + DominatorTree DT(*Caller); + AssumptionCache &AC = GetAssumptionCache(*Caller); + PromoteMemToReg(PromotableAllocas, DT, &AC); + NumAllocasPromoted += PromotableAllocas.size(); + // Emit a remark for the promotion. + OptimizationRemarkEmitter ORE(Caller); + DebugLoc DLoc = Caller->getEntryBlock().getTerminator()->getDebugLoc(); + ORE.emit([&]() { +return OptimizationRemark(DEBUG_TYPE, "PromoteAllocas", DLoc, + &Caller->getEntryBlock()) + << "Promoting " << ore::NV("NumAlloca", PromotableAllocas.size()) + << " allocas to SSA registers in function '" + << ore::NV("Function", Caller) << "'"; + }); + LLVM_DEBUG(dbgs() << "Promoted " << PromotableAllocas.size() +<< " allocas to registers in function " << Caller->getName() +<< "\n"); +} + +/// We use a different visitation order of functions here to solve a phase +/// ordering problem. After inlining, a caller function may have allocas that +/// were previously used for passing reference arguments to the callee that +/// are now promotable to registers, using SROA/mem2reg. However if we just let +/// the AlwaysInliner continue inlining everything at once, the later SROA pass +/// in the pipeline will end up placing phis for these allocas into blocks along +/// the dominance frontier which may extend further than desired (e.g. loop +/// headers). This can happen when the caller is then inlined into another +/// caller, and the allocas end up hoisted further before SROA is run. +/// +/// Instead what we want is to try to do, as best as we can, is to inline leaf +/// functions into callers, and then run PromoteMemToReg() on the allocas that +/// were passed into the callee before it was inlined. +/// +/// We want to do this *before* the caller is inlined into another caller +/// because we want the alloca promotion to happen before its scope extends too +/// far because of further inlining. +/// +/// Here's a simple pseudo-example: +/// outermost_caller() { +/// for (...) { +/// middle_caller(); +/// } +/// } +/// +/// middle_caller() { +/// int stack_var; +/// inner_callee(&stack_var); +/// } +/// +/// inner_callee(int *x) { +/// // Do something with x. +/// } +/// +/// In this case, we want to inline inner_callee() into middle_caller() and +/// then promote stack_var to a register before we inline middle_caller() into +/// outermost_caller(). The regular always_inliner would inline everything at +/// once, and then SROA/mem2reg would promote stack_var to a register but in +/// the context of outermost_caller() which is not what we want. aemerson wrote: Sure. The problem is that mem2reg promotion has to place phi nodes for the value along the dominance frontier. This frontier is different depending on inlining order. For allocas, what you want is to insert phis when the size of the dominance frontier is as small as possible. The motivation is that allocas inside nested loops can "leak" phis beyond the innermost loop header, and that's bad for register pressure. The main inline already handles this because the pass manager interleaves optimizations with inlining, but for always-inliner we don't have that capability. https://github.com/llvm/llvm-project/pull/145613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use reverse iteration in CodeGenPrepare (PR #145484)
Pierre-vh wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/145484?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#145484** https://app.graphite.dev/github/pr/llvm/llvm-project/145484?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/145484?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#145483** https://app.graphite.dev/github/pr/llvm/llvm-project/145483?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/145484 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
@@ -0,0 +1,113 @@ +//===- MCGOFFAttributes.h - Attributes of GOFF symbols ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// Defines the various attribute collections defining GOFF symbols. +// +//===--===// + +#ifndef LLVM_MC_MCGOFFATTRIBUTES_H +#define LLVM_MC_MCGOFFATTRIBUTES_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/GOFF.h" + +namespace llvm { +namespace GOFF { +// An "External Symbol Definition" in the GOFF file has a type, and depending on +// the type a different subset of the fields is used. +// +// Unlike other formats, a 2 dimensional structure is used to define the +// location of data. For example, the equivalent of the ELF .text section is +// made up of a Section Definition (SD) and a class (Element Definition; ED). +// The name of the SD symbol depends on the application, while the class has the +// predefined name C_CODE/C_CODE64 in AMODE31 and AMODE64 respectively. +// +// Data can be placed into this structure in 2 ways. First, the data (in a text +// record) can be associated with an ED symbol. To refer to data, a Label +// Definition (LD) is used to give an offset into the data a name. When binding, +// the whole data is pulled into the resulting executable, and the addresses +// given by the LD symbols are resolved. +// +// The alternative is to use a Part Definition (PR). In this case, the data (in +// a text record) is associated with the part. When binding, only the data of +// referenced PRs is pulled into the resulting binary. +// +// Both approaches are used, which means that the equivalent of a section in ELF +// results in 3 GOFF symbols, either SD/ED/LD or SD/ED/PR. Moreover, certain +// sections are fine with just defining SD/ED symbols. The SymbolMapper takes +// care of all those details. + +// Attributes for SD symbols. +struct SDAttr { + GOFF::ESDTaskingBehavior TaskingBehavior = GOFF::ESD_TA_Unspecified; + GOFF::ESDBindingScope BindingScope = GOFF::ESD_BSC_Unspecified; +}; + +// Attributes for ED symbols. +struct EDAttr { + bool IsReadOnly = false; + GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified; + GOFF::ESDAmode Amode; + GOFF::ESDRmode Rmode; + GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName; + GOFF::ESDTextStyle TextStyle = GOFF::ESD_TS_ByteOriented; + GOFF::ESDBindingAlgorithm BindAlgorithm = GOFF::ESD_BA_Concatenate; + GOFF::ESDLoadingBehavior LoadBehavior = GOFF::ESD_LB_Initial; + GOFF::ESDReserveQwords ReservedQwords = GOFF::ESD_RQ_0; + GOFF::ESDAlignment Alignment = GOFF::ESD_ALIGN_Doubleword; +}; + +// Attributes for LD symbols. +struct LDAttr { + bool IsRenamable = false; + GOFF::ESDExecutable Executable = GOFF::ESD_EXE_Unspecified; + GOFF::ESDNameSpaceId NameSpace = GOFF::ESD_NS_NormalName; + GOFF::ESDBindingStrength BindingStrength = GOFF::ESD_BST_Strong; uweigand wrote: Like above, right now there doesn't appear to be any code to emit LD symbols to asm output. https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/145009 >From db5463b1af5c1c425866979dcf85ee5919c8a75d Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 08:50:34 + Subject: [PATCH 1/5] address comments + add reloctable test Created using spr 1.3.5-bogner --- ...=> elf-executable-symbolize-operands.yaml} | 31 +++- .../elf-relocatable-symbolize-operands.s | 77 +++ 2 files changed, 105 insertions(+), 3 deletions(-) rename llvm/test/tools/llvm-objdump/AArch64/{elf-disassemble-symbololize-operands.yaml => elf-executable-symbolize-operands.yaml} (64%) create mode 100644 llvm/test/tools/llvm-objdump/AArch64/elf-relocatable-symbolize-operands.s diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml similarity index 64% rename from llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml rename to llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml index 3f3c6f33e620f..d318ea01b4c30 100644 --- a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml +++ b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml @@ -1,14 +1,14 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x6000 # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x8000 ## Expect to find the branch labels and global variable name. # CHECK: <_start>: # CHECK-NEXT: ldr x0, # CHECK-NEXT: : -# CHECK-NEXT: adrp x1, 0x{{[68]}}000 +# CHECK-NEXT: adrp x1, [[ABS_ADRP_VAL]] # CHECK-NEXT: adr x2, # CHECK-NEXT: cmp x1, x2 # CHECK-NEXT: b.eq @@ -17,6 +17,31 @@ # CHECK-NEXT: cbz x2, # CHECK-NEXT: ret +## Machine code generated with: +# llvm-mc --arch=aarch64 --filetype=obj -o tmp.o <: +# CHECK-NEXT: b +# CHECK-NEXT: tbz x0, #0x2c, +# CHECK-NEXT: : +# CHECK-NEXT: b.eq +# CHECK-NEXT: : +# CHECK-NEXT: cbz x1, +# CHECK-NEXT: : +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: R_AARCH64_CALL26 fn2 +# CHECK-NEXT: bl +# CHECK-NEXT: adr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adr x1, +# CHECK-NEXT: R_AARCH64_ADR_PREL_LO21 fn2 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: : +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: R_AARCH64_LD_PREL_LO19 fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: add x3, x3, #0x0 +# CHECK-NEXT: R_AARCH64_ADD_ABS_LO12_NC fn2 +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: ldr x0, [x3] +# CHECK-NEXT: R_AARCH64_LDST64_ABS_LO12_NC fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: ret + +.p2align 4 +.global fn1 +fn1: +b 0f +tbz x0, 44, 2f +0: b.eq 1f +1: cbz x1, 0b +2: nop +bl fn2 +bl .Lfn2 +adr x0, 2b +adr x1, fn2 +adr x2, .Lfn2 +ldr w0, 2b +ldr w0, fn2 +ret + +.p2align 4 +.global fn2 +fn2: +.Lfn2: # local label for non-interposable call +bl .Lfn3 +# In future, we might identify the pairs and symbolize the operands properly +adrp x3, fn2 +add x3, x3, :lo12:fn2 +adrp x3, fn2 +ldr x0, [x3, :lo12:fn2] +ret + +.p2align 4 +.Lfn3: # private function +ret >From 1abf014077dd0e7f5592651a51484a544cad1e49 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 09:24:47 + Subject: [PATCH 2/5] move tests to avoid failure if AArch64 is not configured Created using spr 1.3.5-bogner --- .../AArch64/symbolize-operands-executable.yaml} | 0 .../AArch64/symbolize-operands-reloctable.s} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/tools/llvm-objdump/{AArch64/elf-executable-symbolize-operands.yaml => ELF/AArch64/symbolize-operands-executable.yaml} (100%) rename llvm/test/tools/llvm-objdump/{AArch64/elf-relocatable-symbolize-operands.s => ELF/AArch64/symbolize-operands-reloctable.s} (100%) diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml b/llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml similarity index 100% rename from llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml rename to llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml diff --git a/llv
[llvm-branch-commits] [llvm] ded9781 - Revert "[msan] Automatically print shadow for failing outlined checks (#145107)"
Author: Thurston Dang Date: 2025-06-24T15:52:53-07:00 New Revision: ded97817c5f465aaea9f923d6ec247c8ace0f0c2 URL: https://github.com/llvm/llvm-project/commit/ded97817c5f465aaea9f923d6ec247c8ace0f0c2 DIFF: https://github.com/llvm/llvm-project/commit/ded97817c5f465aaea9f923d6ec247c8ace0f0c2.diff LOG: Revert "[msan] Automatically print shadow for failing outlined checks (#145107)" This reverts commit 1b71ea411a9d36705663b1724ececbdfec7cc98c. Added: Modified: compiler-rt/lib/msan/msan.cpp compiler-rt/lib/msan/msan_interface_internal.h llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp llvm/test/Instrumentation/MemorySanitizer/with-call-type-size.ll Removed: compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index 67879e37fad5e..a3c0c2e485af3 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -352,38 +352,16 @@ void __sanitizer::BufferedStackTrace::UnwindImpl( using namespace __msan; -// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into -// an MSan shadow region. -static void print_shadow_value(void *shadow, u64 size) { - Printf("Shadow value (%llu byte%s):", size, size == 1 ? "" : "s"); - for (unsigned int i = 0; i < size; i++) { -if (i % 4 == 0) - Printf(" "); - -unsigned char x = ((unsigned char *)shadow)[i]; -Printf("%x%x", x >> 4, x & 0xf); - } - Printf("\n"); - Printf( - "Caveat: the shadow value does not necessarily directly correspond to a " - "single user variable. The correspondence is stronger, but not always " - "perfect, when origin tracking is enabled.\n"); - Printf("\n"); -} - -#define MSAN_MAYBE_WARNING(type, size) \ - void __msan_maybe_warning_##size(type s, u32 o) { \ -GET_CALLER_PC_BP;\ - \ -if (UNLIKELY(s)) { \ - if (Verbosity() >= 1) \ -print_shadow_value((void *)(&s), sizeof(s)); \ - PrintWarningWithOrigin(pc, bp, o); \ - if (__msan::flags()->halt_on_error) { \ -Printf("Exiting\n"); \ -Die(); \ - } \ -}\ +#define MSAN_MAYBE_WARNING(type, size) \ + void __msan_maybe_warning_##size(type s, u32 o) { \ +GET_CALLER_PC_BP; \ +if (UNLIKELY(s)) { \ + PrintWarningWithOrigin(pc, bp, o);\ + if (__msan::flags()->halt_on_error) { \ +Printf("Exiting\n");\ +Die(); \ + } \ +} \ } MSAN_MAYBE_WARNING(u8, 1) @@ -391,30 +369,6 @@ MSAN_MAYBE_WARNING(u16, 2) MSAN_MAYBE_WARNING(u32, 4) MSAN_MAYBE_WARNING(u64, 8) -// N.B. Only [shadow, shadow+size) is defined. shadow is *not* a pointer into -// an MSan shadow region. -void __msan_maybe_warning_N(void *shadow, u64 size, u32 o) { - GET_CALLER_PC_BP; - - bool allZero = true; - for (unsigned int i = 0; i < size; i++) { -if (((char *)shadow)[i]) { - allZero = false; - break; -} - } - - if (UNLIKELY(!allZero)) { -if (Verbosity() >= 1) - print_shadow_value(shadow, size); -PrintWarningWithOrigin(pc, bp, o); -if (__msan::flags()->halt_on_error) { - Printf("Exiting\n"); - Die(); -} - } -} - #define MSAN_MAYBE_STORE_ORIGIN(type, size) \ void __msan_maybe_store_origin_##size(type s, void *p, u32 o) { \ if (UNLIKELY(s)) {\ diff --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h index 75425b98166a9..c2eead13c20cf 100644 --- a/compiler-rt/lib/msan/msan_interface_internal.h +++ b/compiler-rt/lib/msan/msan_interface_internal.h @@ -60,8 +60,6 @@ SANITIZER_INTERFACE_ATTRIBUTE void __msan_maybe_warning_4(u32 s, u32 o); SANITIZER_INTERFACE_ATTRIBUTE void __msan_maybe_warning_8(u64 s, u32 o); -SANITIZER_INTERFACE_ATTRIBUTE -void __msan_maybe_warning_N(void *shadow, u64 size, u32 o); SANITIZER_INTERFACE_ATTRIBUTE void __msan_maybe_store_origin_1(u8 s, void *p, u32 o); diff --git a/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp b/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp deleted file mode 100644 index 3ecb1277f23cc..0 --- a/compiler-rt/test/msan/msan_print_shadow_on_outlined_check.cpp +++ /dev/null @@ -1,39 +0,0 @@ -// RUN: %clangxx_msan -fsanitize-recover=memory -mllvm
[llvm-branch-commits] [llvm] RuntimeLibcalls: Cleanup darwin bzero configuration (PR #145639)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/145639 Write this in a more predicate-apply style instead of the switch. >From 2fdd40315244cf87324ec124e232549b5c611e92 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 25 Jun 2025 14:42:24 +0900 Subject: [PATCH] RuntimeLibcalls: Cleanup darwin bzero configuration Write this in a more predicate-apply style instead of the switch. --- llvm/lib/IR/RuntimeLibcalls.cpp | 12 ++-- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index cb8c8457f5a47..5c01d8595d0f9 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -432,19 +432,11 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); // Some darwins have an optimized __bzero/bzero function. -switch (TT.getArch()) { -case Triple::x86: -case Triple::x86_64: +if (TT.isX86()) { if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6)) setLibcallName(RTLIB::BZERO, "__bzero"); - break; -case Triple::aarch64: -case Triple::aarch64_32: +} else if (TT.isAArch64()) setLibcallName(RTLIB::BZERO, "bzero"); - break; -default: - break; -} if (darwinHasSinCosStret(TT)) { setLibcallName(RTLIB::SINCOS_STRET_F32, "__sincosf_stret"); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Cleanup darwin bzero configuration (PR #145639)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/145639?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#145639** https://app.graphite.dev/github/pr/llvm/llvm-project/145639?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/145639?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#145638** https://app.graphite.dev/github/pr/llvm/llvm-project/145638?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#145636** https://app.graphite.dev/github/pr/llvm/llvm-project/145636?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/145639 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Cleanup darwin exp10 case (PR #145638)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/145638?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#145639** https://app.graphite.dev/github/pr/llvm/llvm-project/145639?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#145638** https://app.graphite.dev/github/pr/llvm/llvm-project/145638?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/145638?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#145636** https://app.graphite.dev/github/pr/llvm/llvm-project/145636?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/145638 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Cleanup darwin exp10 case (PR #145638)
llvmbot wrote: @llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-aarch64 Author: Matt Arsenault (arsenm) Changes Add a predicate function following the example of __sincos_stret --- Full diff: https://github.com/llvm/llvm-project/pull/145638.diff 2 Files Affected: - (modified) llvm/include/llvm/IR/RuntimeLibcalls.h (+2) - (modified) llvm/lib/IR/RuntimeLibcalls.cpp (+23-25) ``diff diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 2a095be58a49e..5bd5fd1ce8d3f 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -149,6 +149,8 @@ struct RuntimeLibcallsInfo { return true; } + static bool darwinHasExp10(const Triple &TT); + /// Return true if the target has sincosf/sincos/sincosl functions static bool hasSinCos(const Triple &TT) { return TT.isGNUEnvironment() || TT.isOSFuchsia() || diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index e9cb970f804ca..cb8c8457f5a47 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -457,33 +457,12 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, } } -switch (TT.getOS()) { -case Triple::MacOSX: - if (TT.isMacOSXVersionLT(10, 9)) { -setLibcallName(RTLIB::EXP10_F32, nullptr); -setLibcallName(RTLIB::EXP10_F64, nullptr); - } else { -setLibcallName(RTLIB::EXP10_F32, "__exp10f"); -setLibcallName(RTLIB::EXP10_F64, "__exp10"); - } - break; -case Triple::IOS: - if (TT.isOSVersionLT(7, 0)) { -setLibcallName(RTLIB::EXP10_F32, nullptr); -setLibcallName(RTLIB::EXP10_F64, nullptr); -break; - } - [[fallthrough]]; -case Triple::DriverKit: -case Triple::TvOS: -case Triple::WatchOS: -case Triple::XROS: -case Triple::BridgeOS: +if (darwinHasExp10(TT)) { setLibcallName(RTLIB::EXP10_F32, "__exp10f"); setLibcallName(RTLIB::EXP10_F64, "__exp10"); - break; -default: - break; +} else { + setLibcallName(RTLIB::EXP10_F32, nullptr); + setLibcallName(RTLIB::EXP10_F64, nullptr); } } @@ -662,3 +641,22 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, if (TT.getArch() == Triple::ArchType::msp430) setMSP430Libcalls(*this, TT); } + +bool RuntimeLibcallsInfo::darwinHasExp10(const Triple &TT) { + assert(TT.isOSDarwin() && "should be called with darwin triple"); + + switch (TT.getOS()) { + case Triple::MacOSX: +return !TT.isMacOSXVersionLT(10, 9); + case Triple::IOS: +return !TT.isOSVersionLT(7, 0); + case Triple::DriverKit: + case Triple::TvOS: + case Triple::WatchOS: + case Triple::XROS: + case Triple::BridgeOS: +return true; + default: +return false; + } +} `` https://github.com/llvm/llvm-project/pull/145638 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Cleanup darwin bzero configuration (PR #145639)
llvmbot wrote: @llvm/pr-subscribers-llvm-ir Author: Matt Arsenault (arsenm) Changes Write this in a more predicate-apply style instead of the switch. --- Full diff: https://github.com/llvm/llvm-project/pull/145639.diff 1 Files Affected: - (modified) llvm/lib/IR/RuntimeLibcalls.cpp (+2-10) ``diff diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index cb8c8457f5a47..5c01d8595d0f9 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -432,19 +432,11 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2"); // Some darwins have an optimized __bzero/bzero function. -switch (TT.getArch()) { -case Triple::x86: -case Triple::x86_64: +if (TT.isX86()) { if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6)) setLibcallName(RTLIB::BZERO, "__bzero"); - break; -case Triple::aarch64: -case Triple::aarch64_32: +} else if (TT.isAArch64()) setLibcallName(RTLIB::BZERO, "bzero"); - break; -default: - break; -} if (darwinHasSinCosStret(TT)) { setLibcallName(RTLIB::SINCOS_STRET_F32, "__sincosf_stret"); `` https://github.com/llvm/llvm-project/pull/145639 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Cleanup darwin exp10 case (PR #145638)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/145638 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Cleanup darwin exp10 case (PR #145638)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/145638 Add a predicate function following the example of __sincos_stret >From 5266f79e89936261972af847253d2b017145e141 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 25 Jun 2025 14:23:21 +0900 Subject: [PATCH] RuntimeLibcalls: Cleanup darwin exp10 case Add a predicate function following the example of __sincos_stret --- llvm/include/llvm/IR/RuntimeLibcalls.h | 2 ++ llvm/lib/IR/RuntimeLibcalls.cpp| 48 -- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.h b/llvm/include/llvm/IR/RuntimeLibcalls.h index 2a095be58a49e..5bd5fd1ce8d3f 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.h +++ b/llvm/include/llvm/IR/RuntimeLibcalls.h @@ -149,6 +149,8 @@ struct RuntimeLibcallsInfo { return true; } + static bool darwinHasExp10(const Triple &TT); + /// Return true if the target has sincosf/sincos/sincosl functions static bool hasSinCos(const Triple &TT) { return TT.isGNUEnvironment() || TT.isOSFuchsia() || diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index e9cb970f804ca..cb8c8457f5a47 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -457,33 +457,12 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, } } -switch (TT.getOS()) { -case Triple::MacOSX: - if (TT.isMacOSXVersionLT(10, 9)) { -setLibcallName(RTLIB::EXP10_F32, nullptr); -setLibcallName(RTLIB::EXP10_F64, nullptr); - } else { -setLibcallName(RTLIB::EXP10_F32, "__exp10f"); -setLibcallName(RTLIB::EXP10_F64, "__exp10"); - } - break; -case Triple::IOS: - if (TT.isOSVersionLT(7, 0)) { -setLibcallName(RTLIB::EXP10_F32, nullptr); -setLibcallName(RTLIB::EXP10_F64, nullptr); -break; - } - [[fallthrough]]; -case Triple::DriverKit: -case Triple::TvOS: -case Triple::WatchOS: -case Triple::XROS: -case Triple::BridgeOS: +if (darwinHasExp10(TT)) { setLibcallName(RTLIB::EXP10_F32, "__exp10f"); setLibcallName(RTLIB::EXP10_F64, "__exp10"); - break; -default: - break; +} else { + setLibcallName(RTLIB::EXP10_F32, nullptr); + setLibcallName(RTLIB::EXP10_F64, nullptr); } } @@ -662,3 +641,22 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT, if (TT.getArch() == Triple::ArchType::msp430) setMSP430Libcalls(*this, TT); } + +bool RuntimeLibcallsInfo::darwinHasExp10(const Triple &TT) { + assert(TT.isOSDarwin() && "should be called with darwin triple"); + + switch (TT.getOS()) { + case Triple::MacOSX: +return !TT.isMacOSXVersionLT(10, 9); + case Triple::IOS: +return !TT.isOSVersionLT(7, 0); + case Triple::DriverKit: + case Triple::TvOS: + case Triple::WatchOS: + case Triple::XROS: + case Triple::BridgeOS: +return true; + default: +return false; + } +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/MaskRay approved this pull request. https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] AlwaysInliner: A new inlining algorithm to interleave alloca promotion with inlines. (PR #145613)
https://github.com/aemerson edited https://github.com/llvm/llvm-project/pull/145613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] AlwaysInliner: A new inlining algorithm to interleave alloca promotion with inlines. (PR #145613)
https://github.com/aemerson updated https://github.com/llvm/llvm-project/pull/145613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] AlwaysInliner: A new inlining algorithm to interleave alloca promotion with inlines. (PR #145613)
https://github.com/aemerson updated https://github.com/llvm/llvm-project/pull/145613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add test for ISD::PTRADD handling in SelectionDAGAddressAnalysis (PR #142777)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/142777 >From ade6820ff56d755a7155cd170d7b1ebf24bc8aef Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Wed, 4 Jun 2025 09:30:34 -0400 Subject: [PATCH] [AMDGPU][SDAG] Add test for ISD::PTRADD handling in SelectionDAGAddressAnalysis Pre-committing test to show improvements in a follow-up PR. --- .../AMDGPU/ptradd-sdag-optimizations.ll | 28 +++ 1 file changed, 28 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index b78dea1684545..d3242905ada64 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -126,3 +126,31 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) #0 { store volatile i64 %dispatch.id, ptr addrspace(1) %ptr ret void } + +; Taken from memcpy-param-combinations.ll, tests PTRADD handling in +; SelectionDAGAddressAnalysis. +define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { +; GFX942_PTRADD-LABEL: memcpy_p1_p4_sz16_align_1_1: +; GFX942_PTRADD: ; %bb.0: ; %entry +; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942_PTRADD-NEXT:global_load_dwordx2 v[4:5], v[2:3], off +; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) +; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[4:5], off +; GFX942_PTRADD-NEXT:global_load_dwordx2 v[2:3], v[2:3], off offset:8 +; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) +; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[2:3], off offset:8 +; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) +; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31] +; +; GFX942_LEGACY-LABEL: memcpy_p1_p4_sz16_align_1_1: +; GFX942_LEGACY: ; %bb.0: ; %entry +; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942_LEGACY-NEXT:global_load_dwordx4 v[2:5], v[2:3], off +; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) +; GFX942_LEGACY-NEXT:global_store_dwordx4 v[0:1], v[2:5], off +; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) +; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31] +entry: + tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false) + ret void +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines (PR #143673)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/143673 >From 91bd709f8f7b0f8a05578bd1291295b5a8d1b6c1 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Wed, 11 Jun 2025 05:48:45 -0400 Subject: [PATCH] [AMDGPU][SDAG] Add target-specific ISD::PTRADD combines This patch adds several (AMDGPU-)target-specific DAG combines for ISD::PTRADD nodes that reproduce existing similar transforms for ISD::ADD nodes. There is no functional change intended for the existing target-specific PTRADD combine. For SWDEV-516125. --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 139 .../AMDGPU/ptradd-sdag-optimizations.ll | 151 ++ 3 files changed, 160 insertions(+), 134 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 30ee6a99b9dfc..45edcf9992706 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6710,7 +6710,9 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, return SDValue(); int64_t Offset = C2->getSExtValue(); switch (Opcode) { - case ISD::ADD: break; + case ISD::ADD: + case ISD::PTRADD: +break; case ISD::SUB: Offset = -uint64_t(Offset); break; default: return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index b54c6cbb18de4..ec57d231dab5d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/SDPatternMatch.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -46,6 +47,7 @@ #include using namespace llvm; +using namespace llvm::SDPatternMatch; #define DEBUG_TYPE "si-lower" @@ -14480,7 +14482,7 @@ static SDValue tryFoldMADwithSRL(SelectionDAG &DAG, const SDLoc &SL, // instead of a tree. SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N, DAGCombinerInfo &DCI) const { - assert(N->getOpcode() == ISD::ADD); + assert(N->isAnyAdd()); SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); @@ -14513,7 +14515,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N, for (SDNode *User : LHS->users()) { // There is a use that does not feed into addition, so the multiply can't // be removed. We prefer MUL + ADD + ADDC over MAD + MUL. - if (User->getOpcode() != ISD::ADD) + if (!User->isAnyAdd()) return SDValue(); // We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer @@ -14625,8 +14627,11 @@ SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N, SDValue Hi = getHiHalf64(LHS, DAG); SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32); +unsigned Opcode = N->getOpcode(); +if (Opcode == ISD::PTRADD) + Opcode = ISD::ADD; SDValue AddHi = -DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32, N->getFlags()); +DAG.getNode(Opcode, SL, MVT::i32, Hi, ConstHi32, N->getFlags()); SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS); return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi); @@ -15100,42 +15105,116 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); + EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - if (N1.getOpcode() == ISD::ADD) { -// (ptradd x, (add y, z)) -> (ptradd (ptradd x, y), z) if z is a constant, -//y is not, and (add y, z) is used only once. -// (ptradd x, (add y, z)) -> (ptradd (ptradd x, z), y) if y is a constant, -//z is not, and (add y, z) is used only once. -// The goal is to move constant offsets to the outermost ptradd, to create -// more opportunities to fold offsets into memory instructions. -// Together with the generic combines in DAGCombiner.cpp, this also -// implements (ptradd (ptradd x, y), z) -> (ptradd (ptradd x, z), y)). -// -// This transform is here instead of in the general DAGCombiner as it can -// turn in-bounds pointer arithmetic out-of-bounds, which is problematic for -// AArch64's CPA. -SDValue X = N0; -SDValue Y = N1.getOperand(0); -SDValue Z = N1.getOperand(1); -if (N1.hasOneUse()) { - bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y); - bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z); - if (ZIsConstant != YIsConstant) { -// If both additions in the original were NUW, the new ones are as well. -
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines (PR #143672)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/143672 >From 74777534d4ee80311ee342af11b4d5a87564f137 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Wed, 11 Jun 2025 05:14:34 -0400 Subject: [PATCH] [AMDGPU][SDAG] Tests for target-specific ISD::PTRADD combines Pre-committing tests to show improvements in a follow-up PR. --- .../AMDGPU/ptradd-sdag-optimizations.ll | 176 ++ 1 file changed, 176 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index 2e76033a480f4..1ec94162951a6 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -142,3 +142,179 @@ entry: tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false) ret void } + +; Test skipping the lower-32-bit addition if it is unnecessary. +define ptr @huge_offset_low_32_unused(ptr %p) { +; GFX942_PTRADD-LABEL: huge_offset_low_32_unused: +; GFX942_PTRADD: ; %bb.0: +; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942_PTRADD-NEXT:s_mov_b32 s0, 0 +; GFX942_PTRADD-NEXT:s_mov_b32 s1, 1 +; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] +; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31] +; +; GFX942_LEGACY-LABEL: huge_offset_low_32_unused: +; GFX942_LEGACY: ; %bb.0: +; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942_LEGACY-NEXT:v_add_u32_e32 v1, 1, v1 +; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31] + %gep = getelementptr inbounds i8, ptr %p, i64 u0x1 + ret ptr %gep +} + +; Reassociate address computation if it leads to more scalar operations. +define amdgpu_kernel void @reassoc_scalar_r(ptr addrspace(1) %out, ptr addrspace(1) %p, i64 %soffset) { +; GFX942_PTRADD-LABEL: reassoc_scalar_r: +; GFX942_PTRADD: ; %bb.0: ; %entry +; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0 +; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0 +; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0) +; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], v[0:1], 0, s[6:7] +; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3] +; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1] +; GFX942_PTRADD-NEXT:s_endpgm +; +; GFX942_LEGACY-LABEL: reassoc_scalar_r: +; GFX942_LEGACY: ; %bb.0: ; %entry +; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0 +; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0 +; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0) +; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6 +; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7 +; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1] +; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1] +; GFX942_LEGACY-NEXT:s_endpgm +entry: + %voffset32 = call i32 @llvm.amdgcn.workitem.id.x() + %voffset = zext i32 %voffset32 to i64 + %offset = add nuw nsw i64 %voffset, %soffset + %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset + store ptr addrspace(1) %gep, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @reassoc_scalar_l(ptr addrspace(1) %out, ptr addrspace(1) %p, i64 %soffset) { +; GFX942_PTRADD-LABEL: reassoc_scalar_l: +; GFX942_PTRADD: ; %bb.0: ; %entry +; GFX942_PTRADD-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX942_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0 +; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0 +; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0) +; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[6:7], 0, v[0:1] +; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[2:3] +; GFX942_PTRADD-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1] +; GFX942_PTRADD-NEXT:s_endpgm +; +; GFX942_LEGACY-LABEL: reassoc_scalar_l: +; GFX942_LEGACY: ; %bb.0: ; %entry +; GFX942_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX942_LEGACY-NEXT:s_load_dwordx2 s[6:7], s[4:5], 0x10 +; GFX942_LEGACY-NEXT:v_mov_b32_e32 v1, 0 +; GFX942_LEGACY-NEXT:v_and_b32_e32 v0, 0x3ff, v0 +; GFX942_LEGACY-NEXT:s_waitcnt lgkmcnt(0) +; GFX942_LEGACY-NEXT:s_add_u32 s2, s2, s6 +; GFX942_LEGACY-NEXT:s_addc_u32 s3, s3, s7 +; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[2:3], s[2:3], 0, v[0:1] +; GFX942_LEGACY-NEXT:global_store_dwordx2 v1, v[2:3], s[0:1] +; GFX942_LEGACY-NEXT:s_endpgm +entry: + %voffset32 = call i32 @llvm.amdgcn.workitem.id.x() + %voffset = zext i32 %voffset32 to i64 + %offset = add nuw nsw i64 %soffset, %voffset + %gep = getelementptr i8, ptr addrspace(1) %p, i64 %offset + store ptr addrspace(1) %gep, ptr addrspace
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in SelectionDAGAddressAnalysis (PR #142778)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/142778 >From d6f5395374f9bef864b0442f8fc62ba260910bdd Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Wed, 4 Jun 2025 09:48:02 -0400 Subject: [PATCH] [AMDGPU][SDAG] Handle ISD::PTRADD in SelectionDAGAddressAnalysis This is used in a bunch of memory-related transforms. For SWDEV-516125. --- .../SelectionDAGAddressAnalysis.cpp | 6 ++-- .../AMDGPU/ptradd-sdag-optimizations.ll | 28 ++- 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..da92aaa860b2b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -231,6 +231,7 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, } break; case ISD::ADD: +case ISD::PTRADD: if (auto *C = dyn_cast(Base->getOperand(1))) { Offset += C->getSExtValue(); Base = DAG.getTargetLoweringInfo().unwrapAddress(Base->getOperand(0)); @@ -259,7 +260,7 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, break; } - if (Base->getOpcode() == ISD::ADD) { + if (Base->isAnyAdd()) { // TODO: The following code appears to be needless as it just // bails on some Ptrs early, reducing the cases where we // find equivalence. We should be able to remove this. @@ -282,8 +283,7 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, } // Check if Index Offset pattern -if (Index->getOpcode() != ISD::ADD || -!isa(Index->getOperand(1))) +if (!Index->isAnyAdd() || !isa(Index->getOperand(1))) return BaseIndexOffset(PotentialBase, Index, Offset, IsIndexSignExt); Offset += cast(Index->getOperand(1))->getSExtValue(); diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index d3242905ada64..2e76033a480f4 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -130,26 +130,14 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) #0 { ; Taken from memcpy-param-combinations.ll, tests PTRADD handling in ; SelectionDAGAddressAnalysis. define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { -; GFX942_PTRADD-LABEL: memcpy_p1_p4_sz16_align_1_1: -; GFX942_PTRADD: ; %bb.0: ; %entry -; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942_PTRADD-NEXT:global_load_dwordx2 v[4:5], v[2:3], off -; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) -; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[4:5], off -; GFX942_PTRADD-NEXT:global_load_dwordx2 v[2:3], v[2:3], off offset:8 -; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) -; GFX942_PTRADD-NEXT:global_store_dwordx2 v[0:1], v[2:3], off offset:8 -; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) -; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31] -; -; GFX942_LEGACY-LABEL: memcpy_p1_p4_sz16_align_1_1: -; GFX942_LEGACY: ; %bb.0: ; %entry -; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942_LEGACY-NEXT:global_load_dwordx4 v[2:5], v[2:3], off -; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) -; GFX942_LEGACY-NEXT:global_store_dwordx4 v[0:1], v[2:5], off -; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) -; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31] +; GFX942-LABEL: memcpy_p1_p4_sz16_align_1_1: +; GFX942: ; %bb.0: ; %entry +; GFX942-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT:global_load_dwordx4 v[2:5], v[2:3], off +; GFX942-NEXT:s_waitcnt vmcnt(0) +; GFX942-NEXT:global_store_dwordx4 v[0:1], v[2:5], off +; GFX942-NEXT:s_waitcnt vmcnt(0) +; GFX942-NEXT:s_setpc_b64 s[30:31] entry: tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false) ret void ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in various special cases (PR #145330)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/145330 >From 286252e31314dd852ef854918068f1ca0023023c Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Tue, 17 Jun 2025 04:03:53 -0400 Subject: [PATCH] [AMDGPU][SDAG] Handle ISD::PTRADD in various special cases There are more places in SIISelLowering.cpp and AMDGPUISelDAGToDAG.cpp that check for ISD::ADD in a pointer context, but as far as I can tell those are only relevant for 32-bit pointer arithmetic (like frame indices/scratch addresses and LDS), for which we don't enable PTRADD generation yet. For SWDEV-516125. --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +- .../CodeGen/SelectionDAG/TargetLowering.cpp | 21 +- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 6 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 7 +- llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll | 67 ++ .../AMDGPU/ptradd-sdag-optimizations.ll | 196 ++ 6 files changed, 105 insertions(+), 194 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 45edcf9992706..efe4639535536 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8219,7 +8219,7 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) G = cast(Src); - else if (Src.getOpcode() == ISD::ADD && + else if (Src->isAnyAdd() && Src.getOperand(0).getOpcode() == ISD::GlobalAddress && Src.getOperand(1).getOpcode() == ISD::Constant) { G = cast(Src.getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 66717135c9adf..63ca47bb119e5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -615,8 +615,14 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, // operands on the new node are also disjoint. SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint : SDNodeFlags::None); + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::PTRADD) { +// It isn't a ptradd anymore if it doesn't operate on the entire +// pointer. +Opcode = ISD::ADD; + } SDValue X = DAG.getNode( - Op.getOpcode(), dl, SmallVT, + Opcode, dl, SmallVT, DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)), DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags); assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?"); @@ -2851,6 +2857,11 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, And1); } [[fallthrough]]; + case ISD::PTRADD: +if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType()) + break; +// PTRADD behaves like ADD if pointers are represented as integers. +[[fallthrough]]; case ISD::ADD: case ISD::SUB: { // Add, Sub, and Mul don't demand any bits in positions beyond that @@ -2960,10 +2971,10 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.getOpcode() == ISD::MUL) { Known = KnownBits::mul(KnownOp0, KnownOp1); -} else { // Op.getOpcode() is either ISD::ADD or ISD::SUB. +} else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB. Known = KnownBits::computeForAddSub( - Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(), - Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1); + Op->isAnyAdd(), Flags.hasNoSignedWrap(), Flags.hasNoUnsignedWrap(), + KnownOp0, KnownOp1); } break; } @@ -5593,7 +5604,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA, return true; } - if (N->getOpcode() == ISD::ADD) { + if (N->isAnyAdd()) { SDValue N1 = N->getOperand(0); SDValue N2 = N->getOperand(1); if (isGAPlusOffset(N1.getNode(), GA, Offset)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 6e990cb2e160c..ee73ad5dda945 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1449,7 +1449,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr, C1 = nullptr; } - if (N0.getOpcode() == ISD::ADD) { + if (N0->isAnyAdd()) { // (add N2, N3) -> addr64, or // (add (add N2, N3), C1) -> addr64 SDValue N2 = N0.getOperand(0); @@ -1899,7 +1899,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, } // Match the variable offset. - if (Addr.getOpcode() == ISD::ADD) { + if (Addr->isAnyAdd()) { LHS = Addr.getOperand(0); RHS = Addr.getOperand(1); @@
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
@@ -0,0 +1,77 @@ +# RUN: llvm-mc --triple=aarch64-elf --filetype=obj < %s | llvm-objdump -d -r --symbolize-operands --no-show-raw-insn --no-leading-addr - | FileCheck %s --match-full-lines + +# CHECK: : +# CHECK-NEXT: b +# CHECK-NEXT: tbz x0, #0x2c, +# CHECK-NEXT: : +# CHECK-NEXT: b.eq +# CHECK-NEXT: : +# CHECK-NEXT: cbz x1, +# CHECK-NEXT: : +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: R_AARCH64_CALL26 fn2 +# CHECK-NEXT: bl +# CHECK-NEXT: adr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adr x1, +# CHECK-NEXT: R_AARCH64_ADR_PREL_LO21 fn2 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: : +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: R_AARCH64_LD_PREL_LO19 fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: add x3, x3, #0x0 +# CHECK-NEXT: R_AARCH64_ADD_ABS_LO12_NC fn2 +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: ldr x0, [x3] +# CHECK-NEXT: R_AARCH64_LDST64_ABS_LO12_NC fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: ret + +.p2align 4 +.global fn1 +fn1: +b 0f +tbz x0, 44, 2f +0: b.eq 1f +1: cbz x1, 0b +2: nop +bl fn2 +bl .Lfn2 +adr x0, 2b +adr x1, fn2 +adr x2, .Lfn2 +ldr w0, 2b +ldr w0, fn2 +ret + +.p2align 4 +.global fn2 +fn2: +.Lfn2: # local label for non-interposable call +bl .Lfn3 +# In future, we might identify the pairs and symbolize the operands properly jh7370 wrote: ```suggestion ## In future, we might identify the pairs and symbolize the operands properly. ``` https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
@@ -1813,6 +1817,12 @@ void AArch64InstPrinter::printAdrAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { + // Do not print the numberic target address when symbolizing. jh7370 wrote: ```suggestion // Do not print the numeric target address when symbolizing. ``` https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/jh7370 edited https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
@@ -0,0 +1,77 @@ +# RUN: llvm-mc --triple=aarch64-elf --filetype=obj < %s | llvm-objdump -d -r --symbolize-operands --no-show-raw-insn --no-leading-addr - | FileCheck %s --match-full-lines + +# CHECK: : +# CHECK-NEXT: b +# CHECK-NEXT: tbz x0, #0x2c, +# CHECK-NEXT: : +# CHECK-NEXT: b.eq +# CHECK-NEXT: : +# CHECK-NEXT: cbz x1, +# CHECK-NEXT: : +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: R_AARCH64_CALL26 fn2 +# CHECK-NEXT: bl +# CHECK-NEXT: adr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adr x1, +# CHECK-NEXT: R_AARCH64_ADR_PREL_LO21 fn2 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: : +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: R_AARCH64_LD_PREL_LO19 fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: add x3, x3, #0x0 +# CHECK-NEXT: R_AARCH64_ADD_ABS_LO12_NC fn2 +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: ldr x0, [x3] +# CHECK-NEXT: R_AARCH64_LDST64_ABS_LO12_NC fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: ret + +.p2align 4 +.global fn1 +fn1: +b 0f +tbz x0, 44, 2f +0: b.eq 1f +1: cbz x1, 0b +2: nop +bl fn2 +bl .Lfn2 +adr x0, 2b +adr x1, fn2 +adr x2, .Lfn2 +ldr w0, 2b +ldr w0, fn2 +ret + +.p2align 4 +.global fn2 +fn2: +.Lfn2: # local label for non-interposable call +bl .Lfn3 +# In future, we might identify the pairs and symbolize the operands properly +adrp x3, fn2 +add x3, x3, :lo12:fn2 +adrp x3, fn2 +ldr x0, [x3, :lo12:fn2] +ret + +.p2align 4 +.Lfn3: # private function jh7370 wrote: ```suggestion .Lfn3: ## Private function ``` https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
@@ -1784,6 +1784,10 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { + // Do not print the numberic target address when symbolizing. jh7370 wrote: ```suggestion // Do not print the numeric target address when symbolizing. ``` https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
@@ -0,0 +1,77 @@ +# RUN: llvm-mc --triple=aarch64-elf --filetype=obj < %s | llvm-objdump -d -r --symbolize-operands --no-show-raw-insn --no-leading-addr - | FileCheck %s --match-full-lines jh7370 wrote: Nit: let's split this line over multiple lines for readability. ```suggestion # RUN: llvm-mc --triple=aarch64-elf --filetype=obj < %s | \ # RUN: llvm-objdump -d -r --symbolize-operands --no-show-raw-insn --no-leading-addr - | \ # RUN: FileCheck %s --match-full-lines ``` https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
@@ -0,0 +1,77 @@ +# RUN: llvm-mc --triple=aarch64-elf --filetype=obj < %s | llvm-objdump -d -r --symbolize-operands --no-show-raw-insn --no-leading-addr - | FileCheck %s --match-full-lines + +# CHECK: : +# CHECK-NEXT: b +# CHECK-NEXT: tbz x0, #0x2c, +# CHECK-NEXT: : +# CHECK-NEXT: b.eq +# CHECK-NEXT: : +# CHECK-NEXT: cbz x1, +# CHECK-NEXT: : +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: R_AARCH64_CALL26 fn2 +# CHECK-NEXT: bl +# CHECK-NEXT: adr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adr x1, +# CHECK-NEXT: R_AARCH64_ADR_PREL_LO21 fn2 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: : +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: R_AARCH64_LD_PREL_LO19 fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: add x3, x3, #0x0 +# CHECK-NEXT: R_AARCH64_ADD_ABS_LO12_NC fn2 +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: ldr x0, [x3] +# CHECK-NEXT: R_AARCH64_LDST64_ABS_LO12_NC fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: ret + +.p2align 4 +.global fn1 +fn1: +b 0f +tbz x0, 44, 2f +0: b.eq 1f +1: cbz x1, 0b +2: nop +bl fn2 +bl .Lfn2 +adr x0, 2b +adr x1, fn2 +adr x2, .Lfn2 +ldr w0, 2b +ldr w0, fn2 +ret + +.p2align 4 +.global fn2 +fn2: +.Lfn2: # local label for non-interposable call jh7370 wrote: ```suggestion .Lfn2: ## Local label for non-interposable call. ``` https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/jh7370 commented: Basically looks good. Just some typos and other nits. Also, the documentation for the option says it works only for PPC and X86. Please could you update this and double-check whether the command-line help has the same note. https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Move S_BFE lowering into RegBankCombiner (PR #141589)
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/141589 >From 76be3031e6f1195263d63fd09d2f0087a7f5e853 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Tue, 27 May 2025 11:16:16 +0200 Subject: [PATCH 1/2] [AMDGPU] Move S_BFE lowering into RegBankCombiner --- llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 14 +- .../Target/AMDGPU/AMDGPURegBankCombiner.cpp | 51 +++ .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 125 -- 3 files changed, 119 insertions(+), 71 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index 9587fad1ecd63..94e1175b06b14 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -151,6 +151,17 @@ def zext_of_shift_amount_combines : GICombineGroup<[ canonicalize_zext_lshr, canonicalize_zext_ashr, canonicalize_zext_shl ]>; +// Early select of uniform BFX into S_BFE instructions. +// These instructions encode the offset/width in a way that requires using +// bitwise operations. Selecting these instructions early allow the combiner +// to potentially fold these. +class lower_uniform_bfx : GICombineRule< + (defs root:$bfx), + (combine (bfx $dst, $src, $o, $w):$bfx, [{ return lowerUniformBFX(*${bfx}); }])>; + +def lower_uniform_sbfx : lower_uniform_bfx; +def lower_uniform_ubfx : lower_uniform_bfx; + let Predicates = [Has16BitInsts, NotHasMed3_16] in { // For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This // saves one instruction compared to the promotion. @@ -198,5 +209,6 @@ def AMDGPURegBankCombiner : GICombiner< zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain, fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp, identity_combines, redundant_and, constant_fold_cast_op, - cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines]> { + cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines, + lower_uniform_sbfx, lower_uniform_ubfx]> { } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index ee324a5e93f0f..2100900bb8eb2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -89,6 +89,8 @@ class AMDGPURegBankCombinerImpl : public Combiner { void applyCanonicalizeZextShiftAmt(MachineInstr &MI, MachineInstr &Ext) const; + bool lowerUniformBFX(MachineInstr &MI) const; + private: SIModeRegisterDefaults getMode() const; bool getIEEE() const; @@ -392,6 +394,55 @@ void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt( MI.eraseFromParent(); } +bool AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const { + assert(MI.getOpcode() == TargetOpcode::G_UBFX || + MI.getOpcode() == TargetOpcode::G_SBFX); + const bool Signed = (MI.getOpcode() == TargetOpcode::G_SBFX); + + Register DstReg = MI.getOperand(0).getReg(); + const RegisterBank *RB = RBI.getRegBank(DstReg, MRI, TRI); + assert(RB && "No RB?"); + if (RB->getID() != AMDGPU::SGPRRegBankID) +return false; + + Register SrcReg = MI.getOperand(1).getReg(); + Register OffsetReg = MI.getOperand(2).getReg(); + Register WidthReg = MI.getOperand(3).getReg(); + + const LLT S32 = LLT::scalar(32); + LLT Ty = MRI.getType(DstReg); + + const unsigned Opc = (Ty == S32) + ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) + : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64); + + // Ensure the high bits are clear to insert the offset. + auto OffsetMask = B.buildConstant(S32, maskTrailingOnes(6)); + auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask); + + // Zeros out the low bits, so don't bother clamping the input value. + auto ShiftAmt = B.buildConstant(S32, 16); + auto ShiftWidth = B.buildShl(S32, WidthReg, ShiftAmt); + + // Transformation function, pack the offset and width of a BFE into + // the format expected by the S_BFE_I32 / S_BFE_U32. In the second + // source, bits [5:0] contain the offset and bits [22:16] the width. + auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth); + + MRI.setRegBank(OffsetMask.getReg(0), *RB); + MRI.setRegBank(ClampOffset.getReg(0), *RB); + MRI.setRegBank(ShiftAmt.getReg(0), *RB); + MRI.setRegBank(ShiftWidth.getReg(0), *RB); + MRI.setRegBank(MergedInputs.getReg(0), *RB); + + auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs}); + if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI)) +llvm_unreachable("failed to constrain BFE"); + + MI.eraseFromParent(); + return true; +} + SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const { return MF.getInfo()->getMode(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index a7b08794fdf1b..764bed7829693 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/li
[llvm-branch-commits] [llvm] [AMDGPU] Add KnownBits simplification combines to RegBankCombiner (PR #141591)
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/141591 >From 10a9e08e314dcb5a57fd1c6a6e818a4146ed9210 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Tue, 27 May 2025 12:29:02 +0200 Subject: [PATCH 1/2] [AMDGPU] Add KnownBits simplification combines to RegBankCombiner --- llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 3 +- llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll | 59 - .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 61 +++--- .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 63 +++ llvm/test/CodeGen/AMDGPU/div_i128.ll | 30 - llvm/test/CodeGen/AMDGPU/itofp.i128.ll| 11 ++-- llvm/test/CodeGen/AMDGPU/lround.ll| 18 +++--- llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll | 16 + 8 files changed, 104 insertions(+), 157 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index 96be17c487130..df867aaa204b1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -210,5 +210,6 @@ def AMDGPURegBankCombiner : GICombiner< fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp, identity_combines, redundant_and, constant_fold_cast_op, cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines, - lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract]> { + lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract, + known_bits_simplifications]> { } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll index 6baa10bb48621..cc0f45681a3e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll @@ -1744,63 +1744,64 @@ define i65 @v_lshr_i65_33(i65 %value) { ; GFX6-LABEL: v_lshr_i65_33: ; GFX6: ; %bb.0: ; GFX6-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT:v_mov_b32_e32 v3, v1 -; GFX6-NEXT:v_mov_b32_e32 v0, 1 +; GFX6-NEXT:v_mov_b32_e32 v3, 1 +; GFX6-NEXT:v_mov_b32_e32 v4, 0 +; GFX6-NEXT:v_and_b32_e32 v3, 1, v2 +; GFX6-NEXT:v_lshl_b64 v[2:3], v[3:4], 31 +; GFX6-NEXT:v_lshrrev_b32_e32 v0, 1, v1 +; GFX6-NEXT:v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT:v_mov_b32_e32 v1, 0 -; GFX6-NEXT:v_and_b32_e32 v0, 1, v2 -; GFX6-NEXT:v_lshl_b64 v[0:1], v[0:1], 31 -; GFX6-NEXT:v_lshrrev_b32_e32 v2, 1, v3 -; GFX6-NEXT:v_or_b32_e32 v0, v2, v0 ; GFX6-NEXT:v_mov_b32_e32 v2, 0 ; GFX6-NEXT:s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_lshr_i65_33: ; GFX8: ; %bb.0: ; GFX8-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT:v_mov_b32_e32 v3, v1 -; GFX8-NEXT:v_mov_b32_e32 v0, 1 +; GFX8-NEXT:v_mov_b32_e32 v3, 1 +; GFX8-NEXT:v_mov_b32_e32 v4, 0 +; GFX8-NEXT:v_and_b32_e32 v3, 1, v2 +; GFX8-NEXT:v_lshlrev_b64 v[2:3], 31, v[3:4] +; GFX8-NEXT:v_lshrrev_b32_e32 v0, 1, v1 +; GFX8-NEXT:v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT:v_mov_b32_e32 v1, 0 -; GFX8-NEXT:v_and_b32_e32 v0, 1, v2 -; GFX8-NEXT:v_lshlrev_b64 v[0:1], 31, v[0:1] -; GFX8-NEXT:v_lshrrev_b32_e32 v2, 1, v3 -; GFX8-NEXT:v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT:v_mov_b32_e32 v2, 0 ; GFX8-NEXT:s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_lshr_i65_33: ; GFX9: ; %bb.0: ; GFX9-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT:v_mov_b32_e32 v3, v1 -; GFX9-NEXT:v_mov_b32_e32 v0, 1 +; GFX9-NEXT:v_mov_b32_e32 v3, 1 +; GFX9-NEXT:v_mov_b32_e32 v4, 0 +; GFX9-NEXT:v_and_b32_e32 v3, 1, v2 +; GFX9-NEXT:v_lshlrev_b64 v[2:3], 31, v[3:4] +; GFX9-NEXT:v_lshrrev_b32_e32 v0, 1, v1 +; GFX9-NEXT:v_or_b32_e32 v0, v0, v2 ; GFX9-NEXT:v_mov_b32_e32 v1, 0 -; GFX9-NEXT:v_and_b32_e32 v0, 1, v2 -; GFX9-NEXT:v_lshlrev_b64 v[0:1], 31, v[0:1] -; GFX9-NEXT:v_lshrrev_b32_e32 v2, 1, v3 -; GFX9-NEXT:v_or_b32_e32 v0, v2, v0 ; GFX9-NEXT:v_mov_b32_e32 v2, 0 ; GFX9-NEXT:s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_lshr_i65_33: ; GFX10: ; %bb.0: ; GFX10-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT:v_mov_b32_e32 v3, v1 -; GFX10-NEXT:v_mov_b32_e32 v0, 1 +; GFX10-NEXT:v_mov_b32_e32 v3, 1 +; GFX10-NEXT:v_mov_b32_e32 v4, 0 +; GFX10-NEXT:v_and_b32_e32 v3, 1, v2 +; GFX10-NEXT:v_lshrrev_b32_e32 v0, 1, v1 ; GFX10-NEXT:v_mov_b32_e32 v1, 0 -; GFX10-NEXT:v_and_b32_e32 v0, 1, v2 -; GFX10-NEXT:v_lshrrev_b32_e32 v2, 1, v3 -; GFX10-NEXT:v_lshlrev_b64 v[0:1], 31, v[0:1] -; GFX10-NEXT:v_or_b32_e32 v0, v2, v0 +; GFX10-NEXT:v_lshlrev_b64 v[2:3], 31, v[3:4] +; GFX10-NEXT:v_or_b32_e32 v0, v0, v2 ; GFX10-NEXT:v_mov_b32_e32 v2, 0 ; GFX10-NEXT:s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_lshr_i65_33: ; GFX11: ; %bb.0: ; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT:v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, 1 -; GFX11-NEXT:v_dual_mov_b32 v1, 0 :: v_dual_an
[llvm-branch-commits] [llvm] [AMDGPU] Move S_BFE lowering into RegBankCombiner (PR #141589)
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/141589 >From 76be3031e6f1195263d63fd09d2f0087a7f5e853 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Tue, 27 May 2025 11:16:16 +0200 Subject: [PATCH 1/2] [AMDGPU] Move S_BFE lowering into RegBankCombiner --- llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 14 +- .../Target/AMDGPU/AMDGPURegBankCombiner.cpp | 51 +++ .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 125 -- 3 files changed, 119 insertions(+), 71 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index 9587fad1ecd63..94e1175b06b14 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -151,6 +151,17 @@ def zext_of_shift_amount_combines : GICombineGroup<[ canonicalize_zext_lshr, canonicalize_zext_ashr, canonicalize_zext_shl ]>; +// Early select of uniform BFX into S_BFE instructions. +// These instructions encode the offset/width in a way that requires using +// bitwise operations. Selecting these instructions early allow the combiner +// to potentially fold these. +class lower_uniform_bfx : GICombineRule< + (defs root:$bfx), + (combine (bfx $dst, $src, $o, $w):$bfx, [{ return lowerUniformBFX(*${bfx}); }])>; + +def lower_uniform_sbfx : lower_uniform_bfx; +def lower_uniform_ubfx : lower_uniform_bfx; + let Predicates = [Has16BitInsts, NotHasMed3_16] in { // For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This // saves one instruction compared to the promotion. @@ -198,5 +209,6 @@ def AMDGPURegBankCombiner : GICombiner< zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain, fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp, identity_combines, redundant_and, constant_fold_cast_op, - cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines]> { + cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines, + lower_uniform_sbfx, lower_uniform_ubfx]> { } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index ee324a5e93f0f..2100900bb8eb2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -89,6 +89,8 @@ class AMDGPURegBankCombinerImpl : public Combiner { void applyCanonicalizeZextShiftAmt(MachineInstr &MI, MachineInstr &Ext) const; + bool lowerUniformBFX(MachineInstr &MI) const; + private: SIModeRegisterDefaults getMode() const; bool getIEEE() const; @@ -392,6 +394,55 @@ void AMDGPURegBankCombinerImpl::applyCanonicalizeZextShiftAmt( MI.eraseFromParent(); } +bool AMDGPURegBankCombinerImpl::lowerUniformBFX(MachineInstr &MI) const { + assert(MI.getOpcode() == TargetOpcode::G_UBFX || + MI.getOpcode() == TargetOpcode::G_SBFX); + const bool Signed = (MI.getOpcode() == TargetOpcode::G_SBFX); + + Register DstReg = MI.getOperand(0).getReg(); + const RegisterBank *RB = RBI.getRegBank(DstReg, MRI, TRI); + assert(RB && "No RB?"); + if (RB->getID() != AMDGPU::SGPRRegBankID) +return false; + + Register SrcReg = MI.getOperand(1).getReg(); + Register OffsetReg = MI.getOperand(2).getReg(); + Register WidthReg = MI.getOperand(3).getReg(); + + const LLT S32 = LLT::scalar(32); + LLT Ty = MRI.getType(DstReg); + + const unsigned Opc = (Ty == S32) + ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) + : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64); + + // Ensure the high bits are clear to insert the offset. + auto OffsetMask = B.buildConstant(S32, maskTrailingOnes(6)); + auto ClampOffset = B.buildAnd(S32, OffsetReg, OffsetMask); + + // Zeros out the low bits, so don't bother clamping the input value. + auto ShiftAmt = B.buildConstant(S32, 16); + auto ShiftWidth = B.buildShl(S32, WidthReg, ShiftAmt); + + // Transformation function, pack the offset and width of a BFE into + // the format expected by the S_BFE_I32 / S_BFE_U32. In the second + // source, bits [5:0] contain the offset and bits [22:16] the width. + auto MergedInputs = B.buildOr(S32, ClampOffset, ShiftWidth); + + MRI.setRegBank(OffsetMask.getReg(0), *RB); + MRI.setRegBank(ClampOffset.getReg(0), *RB); + MRI.setRegBank(ShiftAmt.getReg(0), *RB); + MRI.setRegBank(ShiftWidth.getReg(0), *RB); + MRI.setRegBank(MergedInputs.getReg(0), *RB); + + auto MIB = B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs}); + if (!constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI)) +llvm_unreachable("failed to constrain BFE"); + + MI.eraseFromParent(); + return true; +} + SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const { return MF.getInfo()->getMode(); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index a7b08794fdf1b..764bed7829693 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/li
[llvm-branch-commits] [llvm] [AMDGPU] Add KnownBits simplification combines to RegBankCombiner (PR #141591)
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/141591 >From 10a9e08e314dcb5a57fd1c6a6e818a4146ed9210 Mon Sep 17 00:00:00 2001 From: pvanhout Date: Tue, 27 May 2025 12:29:02 +0200 Subject: [PATCH 1/2] [AMDGPU] Add KnownBits simplification combines to RegBankCombiner --- llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 3 +- llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll | 59 - .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 61 +++--- .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 63 +++ llvm/test/CodeGen/AMDGPU/div_i128.ll | 30 - llvm/test/CodeGen/AMDGPU/itofp.i128.ll| 11 ++-- llvm/test/CodeGen/AMDGPU/lround.ll| 18 +++--- llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll | 16 + 8 files changed, 104 insertions(+), 157 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index 96be17c487130..df867aaa204b1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -210,5 +210,6 @@ def AMDGPURegBankCombiner : GICombiner< fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp, identity_combines, redundant_and, constant_fold_cast_op, cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines, - lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract]> { + lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract, + known_bits_simplifications]> { } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll index 6baa10bb48621..cc0f45681a3e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll @@ -1744,63 +1744,64 @@ define i65 @v_lshr_i65_33(i65 %value) { ; GFX6-LABEL: v_lshr_i65_33: ; GFX6: ; %bb.0: ; GFX6-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT:v_mov_b32_e32 v3, v1 -; GFX6-NEXT:v_mov_b32_e32 v0, 1 +; GFX6-NEXT:v_mov_b32_e32 v3, 1 +; GFX6-NEXT:v_mov_b32_e32 v4, 0 +; GFX6-NEXT:v_and_b32_e32 v3, 1, v2 +; GFX6-NEXT:v_lshl_b64 v[2:3], v[3:4], 31 +; GFX6-NEXT:v_lshrrev_b32_e32 v0, 1, v1 +; GFX6-NEXT:v_or_b32_e32 v0, v0, v2 ; GFX6-NEXT:v_mov_b32_e32 v1, 0 -; GFX6-NEXT:v_and_b32_e32 v0, 1, v2 -; GFX6-NEXT:v_lshl_b64 v[0:1], v[0:1], 31 -; GFX6-NEXT:v_lshrrev_b32_e32 v2, 1, v3 -; GFX6-NEXT:v_or_b32_e32 v0, v2, v0 ; GFX6-NEXT:v_mov_b32_e32 v2, 0 ; GFX6-NEXT:s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_lshr_i65_33: ; GFX8: ; %bb.0: ; GFX8-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT:v_mov_b32_e32 v3, v1 -; GFX8-NEXT:v_mov_b32_e32 v0, 1 +; GFX8-NEXT:v_mov_b32_e32 v3, 1 +; GFX8-NEXT:v_mov_b32_e32 v4, 0 +; GFX8-NEXT:v_and_b32_e32 v3, 1, v2 +; GFX8-NEXT:v_lshlrev_b64 v[2:3], 31, v[3:4] +; GFX8-NEXT:v_lshrrev_b32_e32 v0, 1, v1 +; GFX8-NEXT:v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT:v_mov_b32_e32 v1, 0 -; GFX8-NEXT:v_and_b32_e32 v0, 1, v2 -; GFX8-NEXT:v_lshlrev_b64 v[0:1], 31, v[0:1] -; GFX8-NEXT:v_lshrrev_b32_e32 v2, 1, v3 -; GFX8-NEXT:v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT:v_mov_b32_e32 v2, 0 ; GFX8-NEXT:s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_lshr_i65_33: ; GFX9: ; %bb.0: ; GFX9-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT:v_mov_b32_e32 v3, v1 -; GFX9-NEXT:v_mov_b32_e32 v0, 1 +; GFX9-NEXT:v_mov_b32_e32 v3, 1 +; GFX9-NEXT:v_mov_b32_e32 v4, 0 +; GFX9-NEXT:v_and_b32_e32 v3, 1, v2 +; GFX9-NEXT:v_lshlrev_b64 v[2:3], 31, v[3:4] +; GFX9-NEXT:v_lshrrev_b32_e32 v0, 1, v1 +; GFX9-NEXT:v_or_b32_e32 v0, v0, v2 ; GFX9-NEXT:v_mov_b32_e32 v1, 0 -; GFX9-NEXT:v_and_b32_e32 v0, 1, v2 -; GFX9-NEXT:v_lshlrev_b64 v[0:1], 31, v[0:1] -; GFX9-NEXT:v_lshrrev_b32_e32 v2, 1, v3 -; GFX9-NEXT:v_or_b32_e32 v0, v2, v0 ; GFX9-NEXT:v_mov_b32_e32 v2, 0 ; GFX9-NEXT:s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_lshr_i65_33: ; GFX10: ; %bb.0: ; GFX10-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT:v_mov_b32_e32 v3, v1 -; GFX10-NEXT:v_mov_b32_e32 v0, 1 +; GFX10-NEXT:v_mov_b32_e32 v3, 1 +; GFX10-NEXT:v_mov_b32_e32 v4, 0 +; GFX10-NEXT:v_and_b32_e32 v3, 1, v2 +; GFX10-NEXT:v_lshrrev_b32_e32 v0, 1, v1 ; GFX10-NEXT:v_mov_b32_e32 v1, 0 -; GFX10-NEXT:v_and_b32_e32 v0, 1, v2 -; GFX10-NEXT:v_lshrrev_b32_e32 v2, 1, v3 -; GFX10-NEXT:v_lshlrev_b64 v[0:1], 31, v[0:1] -; GFX10-NEXT:v_or_b32_e32 v0, v2, v0 +; GFX10-NEXT:v_lshlrev_b64 v[2:3], 31, v[3:4] +; GFX10-NEXT:v_or_b32_e32 v0, v0, v2 ; GFX10-NEXT:v_mov_b32_e32 v2, 0 ; GFX10-NEXT:s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_lshr_i65_33: ; GFX11: ; %bb.0: ; GFX11-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT:v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, 1 -; GFX11-NEXT:v_dual_mov_b32 v1, 0 :: v_dual_an
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
aengelke wrote: Done. The command line help doesn't give any indication on supported architectures. https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
https://github.com/uweigand commented: Looks generally good to me now, still a few comments inline. https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Add writing of section symbols (PR #133799)
@@ -0,0 +1,106 @@ +//===- MCGOFFAttributes.h - Attributes of GOFF symbols ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// Defines the various attribute collections defining GOFF symbols. +// +//===--===// + +#ifndef LLVM_MC_MCGOFFATTRIBUTES_H +#define LLVM_MC_MCGOFFATTRIBUTES_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/GOFF.h" +#include + +namespace llvm { +namespace GOFF { +// An "External Symbol Definition" in the GOFF file has a type, and depending on +// the type a different subset of the fields is used. +// +// Unlike other formats, a 2 dimensional structure is used to define the +// location of data. For example, the equivalent of the ELF .text section is +// made up of a Section Definition (SD) and a class (Element Definition; ED). +// The name of the SD symbol depends on the application, while the class has the +// predefined name C_CODE/C_CODE64 in AMODE31 and AMODE64 respectively. +// +// Data can be placed into this structure in 2 ways. First, the data (in a text +// record) can be associated with an ED symbol. To refer to data, a Label +// Definition (LD) is used to give an offset into the data a name. When binding, +// the whole data is pulled into the resulting executable, and the addresses +// given by the LD symbols are resolved. +// +// The alternative is to use a Part Definition (PR). In this case, the data (in +// a text record) is associated with the part. When binding, only the data of +// referenced PRs is pulled into the resulting binary. +// +// Both approaches are used, which means that the equivalent of a section in ELF +// results in 3 GOFF symbols, either SD/ED/LD or SD/ED/PR. Moreover, certain +// sections are fine with just defining SD/ED symbols. The SymbolMapper takes +// care of all those details. uweigand wrote: This comment doesn't match the current implementation any more. https://github.com/llvm/llvm-project/pull/133799 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use reverse iteration in CodeGenPrepare (PR #145484)
@@ -285,28 +287,19 @@ bool AMDGPUCodeGenPrepareImpl::run() { BreakPhiNodesCache.clear(); bool MadeChange = false; - Function::iterator NextBB; - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; FI = NextBB) { -BasicBlock *BB = &*FI; -NextBB = std::next(FI); - -BasicBlock::iterator Next; -for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; - I = Next) { - Next = std::next(I); - - MadeChange |= visit(*I); - - if (Next != E) { // Control flow changed -BasicBlock *NextInstBB = Next->getParent(); -if (NextInstBB != BB) { - BB = NextInstBB; - E = BB->end(); - FE = F.end(); -} - } + for (BasicBlock &BB : reverse(F)) { +for (Instruction &I : make_early_inc_range(reverse(BB))) { + if (!DeadVals.contains(&I)) +MadeChange |= visit(I); } } + + while (!DeadVals.empty()) { +RecursivelyDeleteTriviallyDeadInstructions( +DeadVals.pop_back_val(), TLI, /*MSSAU*/ nullptr, arsenm wrote: ```suggestion DeadVals.pop_back_val(), TLI, /*MSSAU=*/ nullptr, ``` https://github.com/llvm/llvm-project/pull/145484 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use reverse iteration in CodeGenPrepare (PR #145484)
https://github.com/Pierre-vh ready_for_review https://github.com/llvm/llvm-project/pull/145484 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use reverse iteration in CodeGenPrepare (PR #145484)
@@ -1634,29 +1634,18 @@ define float @v_recip_sqrt_f32_ulp25_contract(float %x) { ; IR-IEEE-SDAG-LABEL: v_recip_sqrt_f32_ulp25_contract: ; IR-IEEE-SDAG: ; %bb.0: ; IR-IEEE-SDAG-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; IR-IEEE-SDAG-NEXT:s_mov_b32 s4, 0xf80 -; IR-IEEE-SDAG-NEXT:v_mul_f32_e32 v1, 0x4f80, v0 +; IR-IEEE-SDAG-NEXT:s_mov_b32 s4, 0x80 Pierre-vh wrote: This file changed after I deleted the hack around forward iteration https://github.com/llvm/llvm-project/pull/145484 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use reverse iteration in CodeGenPrepare (PR #145484)
@@ -109,6 +110,7 @@ class AMDGPUCodeGenPrepareImpl bool FlowChanged = false; mutable Function *SqrtF32 = nullptr; mutable Function *LdexpF32 = nullptr; + mutable SetVector DeadVals; arsenm wrote: I don't think this needs to be a set, the iteration shouldn't revisit the same instruction twice (at least other IR combiner passes seem to all use vectors) https://github.com/llvm/llvm-project/pull/145484 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GOFF] Emit symbols for functions. (PR #144437)
https://github.com/uweigand commented: There doesn't appear to be any asm output for these? https://github.com/llvm/llvm-project/pull/144437 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in various special cases (PR #145329)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/145329 >From 7c9ac3cdc769e989d87296bfa8998434d6611045 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Tue, 17 Jun 2025 03:51:19 -0400 Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in various special cases Pre-committing tests to show improvements in a follow-up PR. --- llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll | 63 ++ .../AMDGPU/ptradd-sdag-optimizations.ll | 206 ++ 2 files changed, 269 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll new file mode 100644 index 0..fab56383ffa8a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=1 < %s | FileCheck --check-prefixes=GFX6,GFX6_PTRADD %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=0 < %s | FileCheck --check-prefixes=GFX6,GFX6_LEGACY %s + +; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectMUBUF. + +define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; GFX6_PTRADD-LABEL: v_add_i32: +; GFX6_PTRADD: ; %bb.0: +; GFX6_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[8:9], 0x0 +; GFX6_PTRADD-NEXT:v_lshlrev_b32_e32 v0, 2, v0 +; GFX6_PTRADD-NEXT:s_mov_b32 s7, 0x100f000 +; GFX6_PTRADD-NEXT:s_mov_b32 s10, 0 +; GFX6_PTRADD-NEXT:s_mov_b32 s11, s7 +; GFX6_PTRADD-NEXT:s_waitcnt lgkmcnt(0) +; GFX6_PTRADD-NEXT:v_mov_b32_e32 v1, s3 +; GFX6_PTRADD-NEXT:v_add_i32_e32 v0, vcc, s2, v0 +; GFX6_PTRADD-NEXT:v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX6_PTRADD-NEXT:s_mov_b32 s8, s10 +; GFX6_PTRADD-NEXT:s_mov_b32 s9, s10 +; GFX6_PTRADD-NEXT:buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc +; GFX6_PTRADD-NEXT:s_waitcnt vmcnt(0) +; GFX6_PTRADD-NEXT:buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc +; GFX6_PTRADD-NEXT:s_waitcnt vmcnt(0) +; GFX6_PTRADD-NEXT:s_mov_b32 s6, -1 +; GFX6_PTRADD-NEXT:s_mov_b32 s4, s0 +; GFX6_PTRADD-NEXT:s_mov_b32 s5, s1 +; GFX6_PTRADD-NEXT:v_add_i32_e32 v0, vcc, v2, v0 +; GFX6_PTRADD-NEXT:buffer_store_dword v0, off, s[4:7], 0 +; GFX6_PTRADD-NEXT:s_endpgm +; +; GFX6_LEGACY-LABEL: v_add_i32: +; GFX6_LEGACY: ; %bb.0: +; GFX6_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[8:9], 0x0 +; GFX6_LEGACY-NEXT:s_mov_b32 s7, 0x100f000 +; GFX6_LEGACY-NEXT:s_mov_b32 s10, 0 +; GFX6_LEGACY-NEXT:s_mov_b32 s11, s7 +; GFX6_LEGACY-NEXT:v_lshlrev_b32_e32 v0, 2, v0 +; GFX6_LEGACY-NEXT:s_waitcnt lgkmcnt(0) +; GFX6_LEGACY-NEXT:s_mov_b64 s[8:9], s[2:3] +; GFX6_LEGACY-NEXT:v_mov_b32_e32 v1, 0 +; GFX6_LEGACY-NEXT:buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc +; GFX6_LEGACY-NEXT:s_waitcnt vmcnt(0) +; GFX6_LEGACY-NEXT:buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc +; GFX6_LEGACY-NEXT:s_waitcnt vmcnt(0) +; GFX6_LEGACY-NEXT:s_mov_b32 s6, -1 +; GFX6_LEGACY-NEXT:s_mov_b32 s4, s0 +; GFX6_LEGACY-NEXT:s_mov_b32 s5, s1 +; GFX6_LEGACY-NEXT:v_add_i32_e32 v0, vcc, v2, v0 +; GFX6_LEGACY-NEXT:buffer_store_dword v0, off, s[4:7], 0 +; GFX6_LEGACY-NEXT:s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid + %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1 + %a = load volatile i32, ptr addrspace(1) %gep + %b = load volatile i32, ptr addrspace(1) %b_ptr + %result = add i32 %a, %b + store i32 %result, ptr addrspace(1) %out + ret void +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX6: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index 34bb98550de04..0cd920616c515 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -291,3 +291,209 @@ define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) { %gep = getelementptr inbounds i8, ptr %base, i64 %mul ret ptr %gep } + +; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectGlobalSAddr. +define amdgpu_kernel void @uniform_base_varying_offset_imm(ptr addrspace(1) %p) { +; GFX942_PTRADD-LABEL: uniform_base_varying_offset_imm: +; GFX942_PTRADD: ; %bb.0: ; %entry +; GFX942_PTRADD-NEXT:s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0 +; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0 +; GFX942_PTRADD-NEXT:v_lshlrev_b32_e32 v0, 2, v0 +; GFX942_PTRADD-NEXT:v_mov_b32_e32 v2, 1 +; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0) +; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] +; GFX942_PTRAD
[llvm-branch-commits] [llvm] [AMDGPU] Use reverse iteration in CodeGenPrepare (PR #145484)
jayfoad wrote: I don't understand the high level motivation here. "Normal" combining/simplification order is to visit the operands of an instruction before you visit the instruction itself. That way the "visit" function can assume that the operands have already been simplified. GlobalISel combines already work this way, and a lot of effort has been put into trying to make the SelectionDAG combiner work this way too. https://github.com/llvm/llvm-project/pull/145484 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG] Deal with POISON for INSERT_VECTOR_ELT/INSERT_SUBVECTOR (part 3) (PR #143105)
https://github.com/bjope updated https://github.com/llvm/llvm-project/pull/143105 From c007c75562eea85cfcac523c27599d5293c484fe Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Sat, 31 May 2025 09:37:27 +0200 Subject: [PATCH] [SelectionDAG] Deal with POISON for INSERT_VECTOR_ELT/INSERT_SUBVECTOR (part 3) Target specific patches to avoid regressions seen after "part 1" aiming at fixing github issue #141034. One perhaps controversial change here is that convertToScalableVector now uses POISON instead of UNDEF for any additional elements added when converting to the scalable vector. This can avoid that we end up with things like t31: nxv1f32 = t32: v2f32 = extract_subvector t31, Constant:i64<0> t38: nxv1f32 = insert_subvector undef:nxv1f32, t32, Constant:i64<0> since if we instead try to insert into poison we can just use t31 instead of t38 without the risk that t31 would be more poisonous. --- llvm/include/llvm/CodeGen/SelectionDAG.h | 11 +- .../Target/AArch64/AArch64ISelLowering.cpp| 9 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 18 +-- .../AArch64/sve-fixed-length-fp-vselect.ll| 81 + .../AArch64/sve-fixed-length-frame-offests.ll | 8 +- .../AArch64/sve-fixed-length-int-vselect.ll | 108 ++ .../AArch64/sve-fixed-length-masked-gather.ll | 6 +- ...-streaming-mode-fixed-length-fp-vselect.ll | 21 ...streaming-mode-fixed-length-int-vselect.ll | 28 - .../fixed-vectors-vfw-web-simplification.ll | 90 +-- .../fixed-vectors-vw-web-simplification.ll| 55 +++-- 11 files changed, 93 insertions(+), 342 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index a98e46c587273..3abdafac4b411 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -953,8 +953,17 @@ class SelectionDAG { } /// Insert \p SubVec at the \p Idx element of \p Vec. + /// If \p SkipUndef is true and \p SubVec is UNDEF/POISON, then \p Vec is + /// returned. SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, - unsigned Idx) { + unsigned Idx, bool SkipUndef = false) { +// Skipping insert of UNDEF could result in POISON elements remaining in the +// resulting vector. The SkipUndef is useful in situations when getNode +// can't reason well enough about ignoring the insert, e.g. when having +// scalable vectors and the user of this method knows that the subvector +// being replaced isn't POISON. +if (SkipUndef && SubVec.isUndef()) + return Vec; return getNode(ISD::INSERT_SUBVECTOR, DL, Vec.getValueType(), Vec, SubVec, getVectorIdxConstant(Idx, DL)); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1169efce3123f..ca34a00467053 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15085,11 +15085,14 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, if (PreferDUPAndInsert) { // First, build a constant vector with the common element. -SmallVector Ops(NumElts, Value); +// Make sure to freeze the common element first, since we will use it also +// for indices that should be UNDEF (so we want to avoid making those +// elements more poisonous). +SmallVector Ops(NumElts, DAG.getFreeze(Value)); SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG); // Next, insert the elements that do not match the common value. for (unsigned I = 0; I < NumElts; ++I) - if (Op.getOperand(I) != Value) + if (Op.getOperand(I) != Value && !Op.getOperand(I).isUndef()) NewVector = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector, Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64)); @@ -28673,7 +28676,7 @@ static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) { "Expected a fixed length vector operand!"); SDLoc DL(V); SDValue Zero = DAG.getConstant(0, DL, MVT::i64); - return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero); + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getPOISON(VT), V, Zero); } // Shrink V so it's just big enough to maintain a VT's worth of data. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b8ef221742a26..3d63099f73b6b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2848,7 +2848,7 @@ static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG, assert(V.getValueType().isFixedLengthVector() && "Expected a fixed length vector operand!"); SDLoc DL(V); - return DAG.getInsertSubvector(DL, DAG.getUNDE