[llvm-branch-commits] Move relocation specifier constants to AArch64:: (PR #144633)

2025-06-17 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay created 
https://github.com/llvm/llvm-project/pull/144633

Rename these relocation specifier constants, aligning with the naming
convention used by other targets (`S_` instead of `VK_`).

* ELF/COFF: AArch64MCExpr::VK_ => AArch64::S_ (VK_ABS/VK_PAGE_ABS are
  also used by Mach-O as a hack)
* Mach-O: AArch64MCExpr::M_ => AArch64::S_MACHO_
* shared: AArch64MCExpr::None => AArch64::S_None

Apologies for the churn following the recent rename in #132595. This
change ensures consistency after introducing MCSpecifierExpr to replace
MCTargetSpecifier subclasses.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] Move relocation specifier constants to AArch64:: (PR #144633)

2025-06-17 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-bolt

@llvm/pr-subscribers-backend-aarch64

Author: Fangrui Song (MaskRay)


Changes

Rename these relocation specifier constants, aligning with the naming
convention used by other targets (`S_` instead of `VK_`).

* ELF/COFF: AArch64MCExpr::VK_ => AArch64::S_ (VK_ABS/VK_PAGE_ABS are
  also used by Mach-O as a hack)
* Mach-O: AArch64MCExpr::M_ => AArch64::S_MACHO_
* shared: AArch64MCExpr::None => AArch64::S_None

Apologies for the churn following the recent rename in #132595. This
change ensures consistency after introducing MCSpecifierExpr to replace
MCTargetSpecifier subclasses.


---

Patch is 95.55 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/144633.diff


14 Files Affected:

- (modified) bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp (+11-11) 
- (modified) llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp (+7-7) 
- (modified) llvm/lib/Target/AArch64/AArch64MCInstLower.cpp (+37-38) 
- (modified) llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp (+5-5) 
- (modified) llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp (+124-136) 
- (modified) llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp 
(+10-10) 
- (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp 
(+15-16) 
- (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp 
(+108-112) 
- (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp (+70-70) 
- (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h (+149-3) 
- (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp 
(+10-11) 
- (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h (-144) 
- (modified) llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp 
(+13-13) 
- (modified) 
llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp (+5-5) 


``diff
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp 
b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index e6e0aeba34572..612c1304efd60 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -1081,7 +1081,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 
 if (isADR(Inst) || RelType == ELF::R_AARCH64_ADR_PREL_LO21 ||
 RelType == ELF::R_AARCH64_TLSDESC_ADR_PREL21) {
-  return MCSpecifierExpr::create(Expr, AArch64MCExpr::VK_ABS, Ctx);
+  return MCSpecifierExpr::create(Expr, AArch64::S_ABS, Ctx);
 } else if (isADRP(Inst) || RelType == ELF::R_AARCH64_ADR_PREL_PG_HI21 ||
RelType == ELF::R_AARCH64_ADR_PREL_PG_HI21_NC ||
RelType == ELF::R_AARCH64_TLSDESC_ADR_PAGE21 ||
@@ -1089,7 +1089,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
RelType == ELF::R_AARCH64_ADR_GOT_PAGE) {
   // Never emit a GOT reloc, we handled this in
   // RewriteInstance::readRelocations().
-  return MCSpecifierExpr::create(Expr, AArch64MCExpr::VK_ABS_PAGE, Ctx);
+  return MCSpecifierExpr::create(Expr, AArch64::S_ABS_PAGE, Ctx);
 } else {
   switch (RelType) {
   case ELF::R_AARCH64_ADD_ABS_LO12_NC:
@@ -1103,18 +1103,18 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
   case ELF::R_AARCH64_TLSDESC_LD64_LO12:
   case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
   case ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
-return MCSpecifierExpr::create(Expr, AArch64MCExpr::VK_LO12, Ctx);
+return MCSpecifierExpr::create(Expr, AArch64::S_LO12, Ctx);
   case ELF::R_AARCH64_MOVW_UABS_G3:
-return MCSpecifierExpr::create(Expr, AArch64MCExpr::VK_ABS_G3, Ctx);
+return MCSpecifierExpr::create(Expr, AArch64::S_ABS_G3, Ctx);
   case ELF::R_AARCH64_MOVW_UABS_G2:
   case ELF::R_AARCH64_MOVW_UABS_G2_NC:
-return MCSpecifierExpr::create(Expr, AArch64MCExpr::VK_ABS_G2_NC, Ctx);
+return MCSpecifierExpr::create(Expr, AArch64::S_ABS_G2_NC, Ctx);
   case ELF::R_AARCH64_MOVW_UABS_G1:
   case ELF::R_AARCH64_MOVW_UABS_G1_NC:
-return MCSpecifierExpr::create(Expr, AArch64MCExpr::VK_ABS_G1_NC, Ctx);
+return MCSpecifierExpr::create(Expr, AArch64::S_ABS_G1_NC, Ctx);
   case ELF::R_AARCH64_MOVW_UABS_G0:
   case ELF::R_AARCH64_MOVW_UABS_G0_NC:
-return MCSpecifierExpr::create(Expr, AArch64MCExpr::VK_ABS_G0_NC, Ctx);
+return MCSpecifierExpr::create(Expr, AArch64::S_ABS_G0_NC, Ctx);
   default:
 break;
   }
@@ -2028,7 +2028,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 Inst.setOpcode(AArch64::MOVZXi);
 Inst.addOperand(MCOperand::createReg(AArch64::X16));
 Inst.addOperand(MCOperand::createExpr(
-MCSpecifierExpr::create(Target, AArch64MCExpr::VK_ABS_G3, *Ctx)));
+MCSpecifierExpr::create(Target, AArch64::S_ABS_G3, *Ctx)));
 Inst.addOperand(MCOperand::createImm(0x30));
 Seq.emplace_back(Inst);
 
@@ -2037,7 +2037,7 @@ class AArch64MCPl

[llvm-branch-commits] [llvm] PowerPC: Fix using long double libm functions for f128 intrinsics (PR #144382)

2025-06-17 Thread Nemanja Ivanovic via llvm-branch-commits

https://github.com/nemanjai approved this pull request.

LGTM.
I believe that the finite functions are provided by GLIBC on PPC for F128, but 
perhaps someone from IBM can confirm (@lei137 @w2yehia @RolandF77).

https://github.com/llvm/llvm-project/pull/144382
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] PowerPC: Fix using long double libm functions for f128 intrinsics (PR #144382)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Jun 18, 4:19 AM UTC**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/144382).


https://github.com/llvm/llvm-project/pull/144382
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)

2025-06-17 Thread Paul Kirth via llvm-branch-commits

ilovepi wrote:



> Linux CI shows failing but looks like all tests passed despite that.
> 
> Added compound constraint support which just goes through the nested 
> expressions until it reaches the constraint.

I've just been clicking re-run on those when i see it. you may want to file 
abug about it. or chime in on an existing one.

https://github.com/llvm/llvm-project/pull/144430
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [GOFF] Emit symbols for functions. (PR #144437)

2025-06-17 Thread Kai Nacke via llvm-branch-commits

https://github.com/redstar updated 
https://github.com/llvm/llvm-project/pull/144437

>From 2b99eb315cea3e75a6ecc49571d75d9371669116 Mon Sep 17 00:00:00 2001
From: Kai Nacke 
Date: Mon, 16 Jun 2025 17:27:16 -0400
Subject: [PATCH 1/2] [GOFF] Emit symbols for functions.

A function entry is mapped to a LD symbol with an offset to the begin of the 
section.
---
 llvm/include/llvm/MC/MCGOFFStreamer.h  |  7 +--
 llvm/include/llvm/MC/MCSymbolGOFF.h| 19 ++-
 llvm/lib/MC/CMakeLists.txt |  1 +
 llvm/lib/MC/GOFFObjectWriter.cpp   |  1 +
 llvm/lib/MC/MCGOFFStreamer.cpp | 58 ++
 llvm/lib/MC/MCSymbolGOFF.cpp   | 38 ++
 llvm/test/CodeGen/SystemZ/zos-section-1.ll | 34 -
 7 files changed, 140 insertions(+), 18 deletions(-)
 create mode 100644 llvm/lib/MC/MCSymbolGOFF.cpp

diff --git a/llvm/include/llvm/MC/MCGOFFStreamer.h 
b/llvm/include/llvm/MC/MCGOFFStreamer.h
index 366d7dc08c679..968bef044d175 100644
--- a/llvm/include/llvm/MC/MCGOFFStreamer.h
+++ b/llvm/include/llvm/MC/MCGOFFStreamer.h
@@ -30,9 +30,10 @@ class MCGOFFStreamer : public MCObjectStreamer {
 
   GOFFObjectWriter &getWriter();
 
-  bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override {
-return false;
-  }
+  void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
+
+  bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
+
   void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
 Align ByteAlignment) override {}
   void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override {}
diff --git a/llvm/include/llvm/MC/MCSymbolGOFF.h 
b/llvm/include/llvm/MC/MCSymbolGOFF.h
index d8c570d2de240..b9295d3690803 100644
--- a/llvm/include/llvm/MC/MCSymbolGOFF.h
+++ b/llvm/include/llvm/MC/MCSymbolGOFF.h
@@ -28,7 +28,10 @@ class MCSymbolGOFF : public MCSymbol {
   GOFF::LDAttr LDAttributes;
 
   enum SymbolFlags : uint16_t {
-SF_LD = 0x01, // LD attributes are set.
+SF_LD = 0x01,   // LD attributes are set.
+// Leave place for EX attributes.
+SF_Hidden = 0x04,   // Symbol is hidden, aka not exported.
+SF_Weak = 0x08, // Symbol is weak.
   };
 
 public:
@@ -39,7 +42,8 @@ class MCSymbolGOFF : public MCSymbol {
 modifyFlags(SF_LD, SF_LD);
 LDAttributes = Attr;
   }
-  GOFF::LDAttr getLDAttributes() const { return LDAttributes; }
+  const GOFF::LDAttr &getLDAttributes() const { return LDAttributes; }
+  GOFF::LDAttr &getLDAttributes() { return LDAttributes; }
   bool hasLDAttributes() const { return getFlags() & SF_LD; }
 
   void setADA(MCSectionGOFF *AssociatedDataArea) {
@@ -48,6 +52,17 @@ class MCSymbolGOFF : public MCSymbol {
   }
   MCSectionGOFF *getADA() const { return ADA; }
 
+  void setHidden(bool Value = true) {
+modifyFlags(Value ? SF_Hidden : 0, SF_Hidden);
+  }
+  bool isHidden() const { return getFlags() & SF_Hidden; }
+  bool isExported() const { return !isHidden(); }
+
+  void setWeak(bool Value = true) { modifyFlags(Value ? SF_Weak : 0, SF_Weak); 
}
+  bool isWeak() const { return getFlags() & SF_Weak; }
+
+  void initAttributes();
+
   static bool classof(const MCSymbol *S) { return S->isGOFF(); }
 };
 } // end namespace llvm
diff --git a/llvm/lib/MC/CMakeLists.txt b/llvm/lib/MC/CMakeLists.txt
index d662c42c522fc..85e857d3fb406 100644
--- a/llvm/lib/MC/CMakeLists.txt
+++ b/llvm/lib/MC/CMakeLists.txt
@@ -55,6 +55,7 @@ add_llvm_component_library(LLVMMC
   MCSubtargetInfo.cpp
   MCSymbol.cpp
   MCSymbolELF.cpp
+  MCSymbolGOFF.cpp
   MCSymbolXCOFF.cpp
   MCTargetOptions.cpp
   MCTargetOptionsCommandFlags.cpp
diff --git a/llvm/lib/MC/GOFFObjectWriter.cpp b/llvm/lib/MC/GOFFObjectWriter.cpp
index 214533b99688e..c7fa2e99f6625 100644
--- a/llvm/lib/MC/GOFFObjectWriter.cpp
+++ b/llvm/lib/MC/GOFFObjectWriter.cpp
@@ -329,6 +329,7 @@ void GOFFWriter::defineLabel(const MCSymbolGOFF &Symbol) {
 Section.getEDAttributes().NameSpace, Symbol.getLDAttributes());
   if (Symbol.getADA())
 LD.ADAEsdId = Symbol.getADA()->getOrdinal();
+  LD.Offset = Asm.getSymbolOffset(Symbol);
   writeSymbol(LD);
 }
 
diff --git a/llvm/lib/MC/MCGOFFStreamer.cpp b/llvm/lib/MC/MCGOFFStreamer.cpp
index b7021915e7b70..451acf3b5d781 100644
--- a/llvm/lib/MC/MCGOFFStreamer.cpp
+++ b/llvm/lib/MC/MCGOFFStreamer.cpp
@@ -15,8 +15,11 @@
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCGOFFObjectWriter.h"
+#include "llvm/MC/MCSymbolGOFF.h"
 #include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Casting.h"
 
 using namespace llvm;
 
@@ -41,6 +44,61 @@ void MCGOFFStreamer::changeSection(MCSection *Section, 
uint32_t Subsection) {
   MCObjectStreamer::changeSection(Section, Subsection);
 }
 
+void MCGOFFStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) {
+  MCObjectStreamer::emitLabel(Symbol, Loc);
+  cast(Symbol)->initAt

[llvm-branch-commits] [llvm] [Remarks] Auto-detect remark parser format (PR #144554)

2025-06-17 Thread Tobias Stadler via llvm-branch-commits

https://github.com/tobias-stadler created 
https://github.com/llvm/llvm-project/pull/144554

Add remark format 'Auto', which performs automatic detection of the remark 
format using the magic numbers at the beginning of the remarks files.

The RemarkLinker already did something similar, so we streamlined this and 
exposed this to llvm-remarkutil.

Depends on #144527

>From a428e237fcc52830549144bf3afdcddb29742b0d Mon Sep 17 00:00:00 2001
From: Tobias Stadler 
Date: Mon, 16 Jun 2025 15:32:15 +0100
Subject: [PATCH] [Remarks] Auto-detect remark parser format

Add remark format 'Auto', which performs automatic detection of the
remark format using the magic numbers at the beginning of the remarks
files.

The RemarkLinker already did something similar, so we streamlined this
and exposed this to llvm-remarkutil.

Depends on #144527
---
 llvm/include/llvm/Remarks/RemarkFormat.h  |  5 -
 llvm/include/llvm/Remarks/RemarkLinker.h  |  5 ++---
 llvm/lib/Remarks/RemarkFormat.cpp | 18 +++-
 llvm/lib/Remarks/RemarkLinker.cpp | 14 +++--
 llvm/lib/Remarks/RemarkParser.cpp | 21 +--
 llvm/lib/Remarks/RemarkSerializer.cpp |  6 --
 .../Inputs/broken-remark-magic.bitstream  |  1 +
 .../llvm-remarkutil/annotation-count.test |  2 ++
 .../broken-bitstream-remark-magic.test|  6 ++
 .../tools/llvm-remarkutil/empty-file.test |  5 +
 .../llvm-remarkutil/instruction-count.test|  4 +++-
 .../llvm-remarkutil/instruction-mix.test  |  4 +++-
 .../size-diff/no-difference.test  |  3 +++
 .../tools/llvm-remarkutil/RemarkUtilHelpers.h |  9 +---
 llvm/unittests/Remarks/RemarksLinkingTest.cpp |  4 +---
 15 files changed, 75 insertions(+), 32 deletions(-)
 create mode 100644 
llvm/test/tools/llvm-remarkutil/Inputs/broken-remark-magic.bitstream
 create mode 100644 
llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test

diff --git a/llvm/include/llvm/Remarks/RemarkFormat.h 
b/llvm/include/llvm/Remarks/RemarkFormat.h
index a39a013dcf905..eda201d4ee6f1 100644
--- a/llvm/include/llvm/Remarks/RemarkFormat.h
+++ b/llvm/include/llvm/Remarks/RemarkFormat.h
@@ -23,7 +23,7 @@ namespace remarks {
 constexpr StringLiteral Magic("REMARKS");
 
 /// The format used for serializing/deserializing remarks.
-enum class Format { Unknown, YAML, Bitstream };
+enum class Format { Unknown, Auto, YAML, Bitstream };
 
 /// Parse and validate a string for the remark format.
 LLVM_ABI Expected parseFormat(StringRef FormatStr);
@@ -31,6 +31,9 @@ LLVM_ABI Expected parseFormat(StringRef FormatStr);
 /// Parse and validate a magic number to a remark format.
 LLVM_ABI Expected magicToFormat(StringRef Magic);
 
+/// Detect format based on selected format and magic number
+LLVM_ABI Expected detectFormat(Format Selected, StringRef Magic);
+
 } // end namespace remarks
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Remarks/RemarkLinker.h 
b/llvm/include/llvm/Remarks/RemarkLinker.h
index 5343c62144708..67208f40592a5 100644
--- a/llvm/include/llvm/Remarks/RemarkLinker.h
+++ b/llvm/include/llvm/Remarks/RemarkLinker.h
@@ -80,13 +80,12 @@ struct RemarkLinker {
   /// \p Buffer.
   /// \p Buffer can be either a standalone remark container or just
   /// metadata. This takes care of uniquing and merging the remarks.
-  LLVM_ABI Error link(StringRef Buffer,
-  std::optional RemarkFormat = std::nullopt);
+  LLVM_ABI Error link(StringRef Buffer, Format RemarkFormat = Format::Auto);
 
   /// Link the remarks found in \p Obj by looking for the right section and
   /// calling the method above.
   LLVM_ABI Error link(const object::ObjectFile &Obj,
-  std::optional RemarkFormat = std::nullopt);
+  Format RemarkFormat = Format::Auto);
 
   /// Serialize the linked remarks to the stream \p OS, using the format \p
   /// RemarkFormat.
diff --git a/llvm/lib/Remarks/RemarkFormat.cpp 
b/llvm/lib/Remarks/RemarkFormat.cpp
index 800f5bffe70da..1c52e352f9392 100644
--- a/llvm/lib/Remarks/RemarkFormat.cpp
+++ b/llvm/lib/Remarks/RemarkFormat.cpp
@@ -42,6 +42,22 @@ Expected llvm::remarks::magicToFormat(StringRef 
MagicStr) {
 
   if (Result == Format::Unknown)
 return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Unknown remark magic: '%s'", MagicStr.data());
+ "Automatic detection of remark format failed. "
+ "Unknown magic number: '%.4s'",
+ MagicStr.data());
   return Result;
 }
+
+Expected llvm::remarks::detectFormat(Format Selected,
+ StringRef MagicStr) {
+  if (Selected == Format::Unknown)
+return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Unknown remark parser format.");
+  if (Selected != Format::Auto)
+return Selected;
+
+  //

[llvm-branch-commits] [lld] [llvm] release/20.x: [lld][WebAssembly] Support for the custom-page-sizes WebAssembly proposal (#128942) (PR #129762)

2025-06-17 Thread Sam Clegg via llvm-branch-commits

sbc100 wrote:

> How is the status on this? Is there any workaround to compile to wasm with 
> memories smaller than 64KiB for embedded systems for demonstration purposes?

Yes, isn't that exactly what https://github.com/llvm/llvm-project/pull/128942 
does?You would link with `-Wl,--page-size=1` and then you could get 
arbitrary sized memories.   

You currently need to use LLVM tip-of-tree to try out this flag.   If this 
backport is landed it would end up in the next 20.X point release.


https://github.com/llvm/llvm-project/pull/129762
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Reduce cost of f64 copysign (PR #141944)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141944

>From 0ddc81d117497e6caea3334f7e62ff1aa62f0e3a Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 15:20:50 +0200
Subject: [PATCH] AMDGPU: Reduce cost of f64 copysign

The real implementation is 1 real instruction plus a constant
materialize. Call that a 1, it's not a real f64 operation.
---
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  | 12 ---
 .../Analysis/CostModel/AMDGPU/copysign.ll | 32 +--
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index b2b25ac66677e..b79c9be3eac93 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -718,9 +718,6 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
 
   MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
 
-  if (SLT == MVT::f64)
-return LT.first * NElts * get64BitInstrCost(CostKind);
-
   if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
   (ST->hasPackedFP32Ops() && SLT == MVT::f32))
 NElts = (NElts + 1) / 2;
@@ -731,6 +728,11 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   switch (ICA.getID()) {
   case Intrinsic::fma:
   case Intrinsic::fmuladd:
+if (SLT == MVT::f64) {
+  InstRate = get64BitInstrCost(CostKind);
+  break;
+}
+
 if ((SLT == MVT::f32 && ST->hasFastFMAF32()) || SLT == MVT::f16)
   InstRate = getFullRateInstrCost();
 else {
@@ -741,8 +743,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   case Intrinsic::copysign:
 return NElts * getFullRateInstrCost();
   case Intrinsic::canonicalize: {
-assert(SLT != MVT::f64);
-InstRate = getFullRateInstrCost();
+InstRate =
+SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost();
 break;
   }
   case Intrinsic::uadd_sat:
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
index 334bb341a3c3e..5b042a8a04603 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
@@ -245,25 +245,25 @@ define void @copysign_bf16() {
 
 define void @copysign_f64() {
 ; ALL-LABEL: 'copysign_f64'
-; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = 
call double @llvm.copysign.f64(double undef, double undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 
= call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 
= call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 
= call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f64 
= call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v8f64 
= call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: 
%v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x 
double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: 
%v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x 
double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = 
call double @llvm.copysign.f64(double undef, double undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 
= call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 
= call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 
= call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f64 
= call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f64 
= call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9f64 
= call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x 
double> undef

[llvm-branch-commits] [llvm] AMDGPU: Move fpenvIEEEMode into TTI (PR #141945)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141945

>From 63d221ae57f3fb5f2e41bc29ff93338c209ab0fe Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 11:16:22 +0200
Subject: [PATCH] AMDGPU: Move fpenvIEEEMode into TTI

---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 28 ++-
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  | 17 +++
 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h |  7 +
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 9be8821d5bf96..d12170a60905b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -60,28 +60,6 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const 
APFloat &Src1,
   return maxnum(Src0, Src1);
 }
 
-enum class KnownIEEEMode { Unknown, On, Off };
-
-/// Return KnownIEEEMode::On if we know if the use context can assume
-/// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
-/// "amdgpu-ieee"="false".
-static KnownIEEEMode fpenvIEEEMode(const Instruction &I,
-   const GCNSubtarget &ST) {
-  if (!ST.hasIEEEMode()) // Only mode on gfx12
-return KnownIEEEMode::On;
-
-  const Function *F = I.getFunction();
-  if (!F)
-return KnownIEEEMode::Unknown;
-
-  Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee");
-  if (IEEEAttr.isValid())
-return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off;
-
-  return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off
-   : KnownIEEEMode::On;
-}
-
 // Check if a value can be converted to a 16-bit value without losing
 // precision.
 // The value is expected to be either a float (IsFloat = true) or an unsigned
@@ -1004,7 +982,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
 // TODO: Also can fold to 2 operands with infinities.
 if ((match(Src0, m_APFloat(ConstSrc0)) && ConstSrc0->isNaN()) ||
 isa(Src0)) {
-  switch (fpenvIEEEMode(II, *ST)) {
+  switch (fpenvIEEEMode(II)) {
   case KnownIEEEMode::On:
 // TODO: If Src2 is snan, does it need quieting?
 if (ConstSrc0 && ConstSrc0->isSignaling())
@@ -1019,7 +997,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
   }
 } else if ((match(Src1, m_APFloat(ConstSrc1)) && ConstSrc1->isNaN()) ||
isa(Src1)) {
-  switch (fpenvIEEEMode(II, *ST)) {
+  switch (fpenvIEEEMode(II)) {
   case KnownIEEEMode::On:
 // TODO: If Src2 is snan, does it need quieting?
 if (ConstSrc1 && ConstSrc1->isSignaling())
@@ -1035,7 +1013,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
   }
 } else if ((match(Src2, m_APFloat(ConstSrc2)) && ConstSrc2->isNaN()) ||
isa(Src2)) {
-  switch (fpenvIEEEMode(II, *ST)) {
+  switch (fpenvIEEEMode(II)) {
   case KnownIEEEMode::On:
 if (ConstSrc2 && ConstSrc2->isSignaling()) {
   auto *Quieted = ConstantFP::get(II.getType(), 
ConstSrc2->makeQuiet());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index b79c9be3eac93..ce2098a3a19bb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1445,3 +1445,20 @@ void GCNTTIImpl::collectKernelLaunchBounds(
   LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
   LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
 }
+
+GCNTTIImpl::KnownIEEEMode
+GCNTTIImpl::fpenvIEEEMode(const Instruction &I) const {
+  if (!ST->hasIEEEMode()) // Only mode on gfx12
+return KnownIEEEMode::On;
+
+  const Function *F = I.getFunction();
+  if (!F)
+return KnownIEEEMode::Unknown;
+
+  Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee");
+  if (IEEEAttr.isValid())
+return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off;
+
+  return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off
+   : KnownIEEEMode::On;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index ec298c7e9631a..0fae301abf532 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -281,6 +281,13 @@ class GCNTTIImpl final : public 
BasicTTIImplBase {
   void collectKernelLaunchBounds(
   const Function &F,
   SmallVectorImpl> &LB) const override;
+
+  enum class KnownIEEEMode { Unknown, On, Off };
+
+  /// Return KnownIEEEMode::On if we know if the use context can assume
+  /// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
+  /// "amdgpu-ieee"="false".
+

[llvm-branch-commits] [llvm] AMDGPU: Add baseline cost model tests for special argument intrinsics (PR #141947)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141947

>From 4497b8d17e9aa65d0eccb13a179a6dfb544d808d Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 15:29:57 +0200
Subject: [PATCH] AMDGPU: Add baseline cost model tests for special argument
 intrinsics

---
 .../AMDGPU/special-argument-intrinsics.ll | 202 ++
 1 file changed, 202 insertions(+)
 create mode 100644 
llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll

diff --git a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll
new file mode 100644
index 0..ea045e04310be
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll
@@ -0,0 +1,202 @@
+; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='print' 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,UNPACKEDID 
%s
+; RUN: opt -passes='print' 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 < %s | FileCheck 
-check-prefixes=ALL,PACKEDID %s
+
+; RUN: opt -passes='print' -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck 
-check-prefixes=SIZE,SIZE-UNPACKEDID %s
+; RUN: opt -passes='print' -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 < %s | FileCheck 
-check-prefixes=SIZE,SIZE-PACKEDID %s
+
+define i32 @workitem_id_x() {
+; ALL-LABEL: 'workitem_id_x'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
+;
+; SIZE-LABEL: 'workitem_id_x'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
i32 %result
+;
+  %result = call i32 @llvm.amdgcn.workitem.id.x()
+  ret i32 %result
+}
+
+define amdgpu_kernel void @kernel_workitem_id_x(ptr addrspace(1) %ptr) {
+; ALL-LABEL: 'kernel_workitem_id_x'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
void
+;
+; SIZE-LABEL: 'kernel_workitem_id_x'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
+;
+  %result = call i32 @llvm.amdgcn.workitem.id.x()
+  store i32 %result, ptr addrspace(1) %ptr
+  ret void
+}
+
+define i32 @workitem_id_y() {
+; ALL-LABEL: 'workitem_id_y'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
+;
+; SIZE-LABEL: 'workitem_id_y'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
i32 %result
+;
+  %result = call i32 @llvm.amdgcn.workitem.id.y()
+  ret i32 %result
+}
+
+define amdgpu_kernel void @kernel_workitem_id_y(ptr addrspace(1) %ptr) {
+; ALL-LABEL: 'kernel_workitem_id_y'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
void
+;
+; SIZE-LABEL: 'kernel_workitem_id_y'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
+;
+  %result = call i32 @llvm.amdgcn.workitem.id.y()
+  store i32 %result, ptr addrspace(1) %ptr
+  ret void
+}
+
+define i32 @workitem_id_z() {
+; ALL-LABEL: 'workitem_id_z'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
+;
+; SIZE-LABEL: 'workitem_id_z'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y

[llvm-branch-commits] [llvm] AMDGPU: Fix cost model for 16-bit operations on gfx8 (PR #141943)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141943

>From 7fbe4e233098676cc2af8aaad48a1eb5f8cb360f Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 14:41:33 +0200
Subject: [PATCH] AMDGPU: Fix cost model for 16-bit operations on gfx8

We should only divide the number of pieces to fit the packed instructions
if we actually have pk instructions. This increases the cost of copysign,
but is closer to the current codegen output. It could be much cheaper
than it is now.
---
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  |  2 +-
 .../Analysis/CostModel/AMDGPU/canonicalize.ll | 24 
 .../Analysis/CostModel/AMDGPU/copysign.ll | 28 +--
 .../SLPVectorizer/AMDGPU/slp-v2f16.ll | 12 
 4 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 58bfc0b80b24f..b2b25ac66677e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -721,7 +721,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   if (SLT == MVT::f64)
 return LT.first * NElts * get64BitInstrCost(CostKind);
 
-  if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
+  if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
   (ST->hasPackedFP32Ops() && SLT == MVT::f32))
 NElts = (NElts + 1) / 2;
 
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll
index e162edbf611e2..7ac4db3119210 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll
@@ -22,12 +22,12 @@ define void @canonicalize_f16() {
 ;
 ; GFX8-LABEL: 'canonicalize_f16'
 ; GFX8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = 
call half @llvm.canonicalize.f16(half undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 
= call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 
= call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 
= call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 
= call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: 
%v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: 
%v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef)
+; GFX8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 
= call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef)
+; GFX8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 
= call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef)
+; GFX8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 
= call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef)
+; GFX8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 
= call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef)
+; GFX8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef)
+; GFX8-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: 
%v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef)
 ; GFX8-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
void
 ;
 ; GFX9-LABEL: 'canonicalize_f16'
@@ -62,12 +62,12 @@ define void @canonicalize_f16() {
 ;
 ; GFX8-SIZE-LABEL: 'canonicalize_f16'
 ; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%f16 = call half @llvm.canonicalize.f16(half undef)
-; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%v2f16 = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef)
-; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%v3f16 = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef)
-; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%v4f16 = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef)
-; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: 
%v5f16 = call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef)
-; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: 
%v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef)
-; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: 
%v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef)
+; GFX8-SIZE-NEXT:

[llvm-branch-commits] [llvm] AMDGPU: Report special input intrinsics as free (PR #141948)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141948

>From ed073f0e8a14686e87f580fc859a76f7f0ddf4b2 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 15:32:59 +0200
Subject: [PATCH] AMDGPU: Report special input intrinsics as free

---
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  | 23 +++-
 .../AMDGPU/special-argument-intrinsics.ll | 56 +--
 2 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index f3474fcbbfb56..d5a1aaef4ad68 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -704,8 +704,29 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID 
ID) {
 InstructionCost
 GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   TTI::TargetCostKind CostKind) const {
-  if (ICA.getID() == Intrinsic::fabs)
+  switch (ICA.getID()) {
+  case Intrinsic::fabs:
+// Free source modifier in the common case.
+return 0;
+  case Intrinsic::amdgcn_workitem_id_x:
+  case Intrinsic::amdgcn_workitem_id_y:
+  case Intrinsic::amdgcn_workitem_id_z:
+// TODO: If hasPackedTID, or if the calling context is not an entry point
+// there may be a bit instruction.
+return 0;
+  case Intrinsic::amdgcn_workgroup_id_x:
+  case Intrinsic::amdgcn_workgroup_id_y:
+  case Intrinsic::amdgcn_workgroup_id_z:
+  case Intrinsic::amdgcn_lds_kernel_id:
+  case Intrinsic::amdgcn_dispatch_ptr:
+  case Intrinsic::amdgcn_dispatch_id:
+  case Intrinsic::amdgcn_implicitarg_ptr:
+  case Intrinsic::amdgcn_queue_ptr:
+// Read from an argument register.
 return 0;
+  default:
+break;
+  }
 
   if (!intrinsicHasPackedVectorBenefit(ICA.getID()))
 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll
index ea045e04310be..00dbcff0a021f 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll
@@ -7,11 +7,11 @@
 
 define i32 @workitem_id_x() {
 ; ALL-LABEL: 'workitem_id_x'
-; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
 ;
 ; SIZE-LABEL: 'workitem_id_x'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
i32 %result
 ;
   %result = call i32 @llvm.amdgcn.workitem.id.x()
@@ -20,12 +20,12 @@ define i32 @workitem_id_x() {
 
 define amdgpu_kernel void @kernel_workitem_id_x(ptr addrspace(1) %ptr) {
 ; ALL-LABEL: 'kernel_workitem_id_x'
-; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
void
 ;
 ; SIZE-LABEL: 'kernel_workitem_id_x'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
 ;
@@ -36,11 +36,11 @@ define amdgpu_kernel void @kernel_workitem_id_x(ptr 
addrspace(1) %ptr) {
 
 define i32 @workitem_id_y() {
 ; ALL-LABEL: 'workitem_id_y'
-; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
 ;
 ; SIZE-LABEL: 'workitem_id_y'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y()
 ; SIZE-NEXT:  C

[llvm-branch-commits] [llvm] AMDGPU: Add baseline cost model tests for special argument intrinsics (PR #141947)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141947

>From 5179de7f8a6024d25b5a89deadec96894749287e Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 15:29:57 +0200
Subject: [PATCH] AMDGPU: Add baseline cost model tests for special argument
 intrinsics

---
 .../AMDGPU/special-argument-intrinsics.ll | 202 ++
 1 file changed, 202 insertions(+)
 create mode 100644 
llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll

diff --git a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll
new file mode 100644
index 0..ea045e04310be
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll
@@ -0,0 +1,202 @@
+; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='print' 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,UNPACKEDID 
%s
+; RUN: opt -passes='print' 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 < %s | FileCheck 
-check-prefixes=ALL,PACKEDID %s
+
+; RUN: opt -passes='print' -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck 
-check-prefixes=SIZE,SIZE-UNPACKEDID %s
+; RUN: opt -passes='print' -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 < %s | FileCheck 
-check-prefixes=SIZE,SIZE-PACKEDID %s
+
+define i32 @workitem_id_x() {
+; ALL-LABEL: 'workitem_id_x'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
+;
+; SIZE-LABEL: 'workitem_id_x'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
i32 %result
+;
+  %result = call i32 @llvm.amdgcn.workitem.id.x()
+  ret i32 %result
+}
+
+define amdgpu_kernel void @kernel_workitem_id_x(ptr addrspace(1) %ptr) {
+; ALL-LABEL: 'kernel_workitem_id_x'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
void
+;
+; SIZE-LABEL: 'kernel_workitem_id_x'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
+;
+  %result = call i32 @llvm.amdgcn.workitem.id.x()
+  store i32 %result, ptr addrspace(1) %ptr
+  ret void
+}
+
+define i32 @workitem_id_y() {
+; ALL-LABEL: 'workitem_id_y'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
+;
+; SIZE-LABEL: 'workitem_id_y'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
i32 %result
+;
+  %result = call i32 @llvm.amdgcn.workitem.id.y()
+  ret i32 %result
+}
+
+define amdgpu_kernel void @kernel_workitem_id_y(ptr addrspace(1) %ptr) {
+; ALL-LABEL: 'kernel_workitem_id_y'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
void
+;
+; SIZE-LABEL: 'kernel_workitem_id_y'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
+;
+  %result = call i32 @llvm.amdgcn.workitem.id.y()
+  store i32 %result, ptr addrspace(1) %ptr
+  ret void
+}
+
+define i32 @workitem_id_z() {
+; ALL-LABEL: 'workitem_id_z'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
+;
+; SIZE-LABEL: 'workitem_id_z'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y

[llvm-branch-commits] [llvm] AMDGPU: Move fpenvIEEEMode into TTI (PR #141945)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141945

>From 99162375bef4a757fc95bfba805c559b7b13fbfe Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 11:16:22 +0200
Subject: [PATCH] AMDGPU: Move fpenvIEEEMode into TTI

---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 28 ++-
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  | 17 +++
 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h |  7 +
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 9be8821d5bf96..d12170a60905b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -60,28 +60,6 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const 
APFloat &Src1,
   return maxnum(Src0, Src1);
 }
 
-enum class KnownIEEEMode { Unknown, On, Off };
-
-/// Return KnownIEEEMode::On if we know if the use context can assume
-/// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
-/// "amdgpu-ieee"="false".
-static KnownIEEEMode fpenvIEEEMode(const Instruction &I,
-   const GCNSubtarget &ST) {
-  if (!ST.hasIEEEMode()) // Only mode on gfx12
-return KnownIEEEMode::On;
-
-  const Function *F = I.getFunction();
-  if (!F)
-return KnownIEEEMode::Unknown;
-
-  Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee");
-  if (IEEEAttr.isValid())
-return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off;
-
-  return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off
-   : KnownIEEEMode::On;
-}
-
 // Check if a value can be converted to a 16-bit value without losing
 // precision.
 // The value is expected to be either a float (IsFloat = true) or an unsigned
@@ -1004,7 +982,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
 // TODO: Also can fold to 2 operands with infinities.
 if ((match(Src0, m_APFloat(ConstSrc0)) && ConstSrc0->isNaN()) ||
 isa(Src0)) {
-  switch (fpenvIEEEMode(II, *ST)) {
+  switch (fpenvIEEEMode(II)) {
   case KnownIEEEMode::On:
 // TODO: If Src2 is snan, does it need quieting?
 if (ConstSrc0 && ConstSrc0->isSignaling())
@@ -1019,7 +997,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
   }
 } else if ((match(Src1, m_APFloat(ConstSrc1)) && ConstSrc1->isNaN()) ||
isa(Src1)) {
-  switch (fpenvIEEEMode(II, *ST)) {
+  switch (fpenvIEEEMode(II)) {
   case KnownIEEEMode::On:
 // TODO: If Src2 is snan, does it need quieting?
 if (ConstSrc1 && ConstSrc1->isSignaling())
@@ -1035,7 +1013,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
   }
 } else if ((match(Src2, m_APFloat(ConstSrc2)) && ConstSrc2->isNaN()) ||
isa(Src2)) {
-  switch (fpenvIEEEMode(II, *ST)) {
+  switch (fpenvIEEEMode(II)) {
   case KnownIEEEMode::On:
 if (ConstSrc2 && ConstSrc2->isSignaling()) {
   auto *Quieted = ConstantFP::get(II.getType(), 
ConstSrc2->makeQuiet());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index b79c9be3eac93..ce2098a3a19bb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1445,3 +1445,20 @@ void GCNTTIImpl::collectKernelLaunchBounds(
   LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
   LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
 }
+
+GCNTTIImpl::KnownIEEEMode
+GCNTTIImpl::fpenvIEEEMode(const Instruction &I) const {
+  if (!ST->hasIEEEMode()) // Only mode on gfx12
+return KnownIEEEMode::On;
+
+  const Function *F = I.getFunction();
+  if (!F)
+return KnownIEEEMode::Unknown;
+
+  Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee");
+  if (IEEEAttr.isValid())
+return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off;
+
+  return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off
+   : KnownIEEEMode::On;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index ec298c7e9631a..0fae301abf532 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -281,6 +281,13 @@ class GCNTTIImpl final : public 
BasicTTIImplBase {
   void collectKernelLaunchBounds(
   const Function &F,
   SmallVectorImpl> &LB) const override;
+
+  enum class KnownIEEEMode { Unknown, On, Off };
+
+  /// Return KnownIEEEMode::On if we know if the use context can assume
+  /// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
+  /// "amdgpu-ieee"="false".
+

[llvm-branch-commits] [llvm] AMDGPU: Reduce cost of f64 copysign (PR #141944)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141944

>From 641ab37922230a88206b08d07b76df77c9d82512 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 15:20:50 +0200
Subject: [PATCH] AMDGPU: Reduce cost of f64 copysign

The real implementation is 1 real instruction plus a constant
materialize. Call that a 1, it's not a real f64 operation.
---
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  | 12 ---
 .../Analysis/CostModel/AMDGPU/copysign.ll | 32 +--
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index b2b25ac66677e..b79c9be3eac93 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -718,9 +718,6 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
 
   MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
 
-  if (SLT == MVT::f64)
-return LT.first * NElts * get64BitInstrCost(CostKind);
-
   if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
   (ST->hasPackedFP32Ops() && SLT == MVT::f32))
 NElts = (NElts + 1) / 2;
@@ -731,6 +728,11 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   switch (ICA.getID()) {
   case Intrinsic::fma:
   case Intrinsic::fmuladd:
+if (SLT == MVT::f64) {
+  InstRate = get64BitInstrCost(CostKind);
+  break;
+}
+
 if ((SLT == MVT::f32 && ST->hasFastFMAF32()) || SLT == MVT::f16)
   InstRate = getFullRateInstrCost();
 else {
@@ -741,8 +743,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   case Intrinsic::copysign:
 return NElts * getFullRateInstrCost();
   case Intrinsic::canonicalize: {
-assert(SLT != MVT::f64);
-InstRate = getFullRateInstrCost();
+InstRate =
+SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost();
 break;
   }
   case Intrinsic::uadd_sat:
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
index 334bb341a3c3e..5b042a8a04603 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
@@ -245,25 +245,25 @@ define void @copysign_bf16() {
 
 define void @copysign_f64() {
 ; ALL-LABEL: 'copysign_f64'
-; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = 
call double @llvm.copysign.f64(double undef, double undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 
= call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 
= call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 
= call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f64 
= call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v8f64 
= call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: 
%v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x 
double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: 
%v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x 
double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = 
call double @llvm.copysign.f64(double undef, double undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 
= call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 
= call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 
= call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f64 
= call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f64 
= call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9f64 
= call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x 
double> undef

[llvm-branch-commits] [llvm] AMDGPU: Report special input intrinsics as free (PR #141948)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141948

>From ed073f0e8a14686e87f580fc859a76f7f0ddf4b2 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 15:32:59 +0200
Subject: [PATCH] AMDGPU: Report special input intrinsics as free

---
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  | 23 +++-
 .../AMDGPU/special-argument-intrinsics.ll | 56 +--
 2 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index f3474fcbbfb56..d5a1aaef4ad68 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -704,8 +704,29 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID 
ID) {
 InstructionCost
 GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
   TTI::TargetCostKind CostKind) const {
-  if (ICA.getID() == Intrinsic::fabs)
+  switch (ICA.getID()) {
+  case Intrinsic::fabs:
+// Free source modifier in the common case.
+return 0;
+  case Intrinsic::amdgcn_workitem_id_x:
+  case Intrinsic::amdgcn_workitem_id_y:
+  case Intrinsic::amdgcn_workitem_id_z:
+// TODO: If hasPackedTID, or if the calling context is not an entry point
+// there may be a bit instruction.
+return 0;
+  case Intrinsic::amdgcn_workgroup_id_x:
+  case Intrinsic::amdgcn_workgroup_id_y:
+  case Intrinsic::amdgcn_workgroup_id_z:
+  case Intrinsic::amdgcn_lds_kernel_id:
+  case Intrinsic::amdgcn_dispatch_ptr:
+  case Intrinsic::amdgcn_dispatch_id:
+  case Intrinsic::amdgcn_implicitarg_ptr:
+  case Intrinsic::amdgcn_queue_ptr:
+// Read from an argument register.
 return 0;
+  default:
+break;
+  }
 
   if (!intrinsicHasPackedVectorBenefit(ICA.getID()))
 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll
index ea045e04310be..00dbcff0a021f 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll
@@ -7,11 +7,11 @@
 
 define i32 @workitem_id_x() {
 ; ALL-LABEL: 'workitem_id_x'
-; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
 ;
 ; SIZE-LABEL: 'workitem_id_x'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
i32 %result
 ;
   %result = call i32 @llvm.amdgcn.workitem.id.x()
@@ -20,12 +20,12 @@ define i32 @workitem_id_x() {
 
 define amdgpu_kernel void @kernel_workitem_id_x(ptr addrspace(1) %ptr) {
 ; ALL-LABEL: 'kernel_workitem_id_x'
-; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
void
 ;
 ; SIZE-LABEL: 'kernel_workitem_id_x'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
 ;
@@ -36,11 +36,11 @@ define amdgpu_kernel void @kernel_workitem_id_x(ptr 
addrspace(1) %ptr) {
 
 define i32 @workitem_id_y() {
 ; ALL-LABEL: 'workitem_id_y'
-; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
 ;
 ; SIZE-LABEL: 'workitem_id_y'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y()
 ; SIZE-NEXT:  C

[llvm-branch-commits] [llvm] AMDGPU: Add baseline cost model tests for special argument intrinsics (PR #141947)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141947

>From 5179de7f8a6024d25b5a89deadec96894749287e Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 15:29:57 +0200
Subject: [PATCH] AMDGPU: Add baseline cost model tests for special argument
 intrinsics

---
 .../AMDGPU/special-argument-intrinsics.ll | 202 ++
 1 file changed, 202 insertions(+)
 create mode 100644 
llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll

diff --git a/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll
new file mode 100644
index 0..ea045e04310be
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/special-argument-intrinsics.ll
@@ -0,0 +1,202 @@
+; NOTE: Assertions have been autogenerated by 
utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='print' 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,UNPACKEDID 
%s
+; RUN: opt -passes='print' 2>&1 -disable-output 
-mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 < %s | FileCheck 
-check-prefixes=ALL,PACKEDID %s
+
+; RUN: opt -passes='print' -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck 
-check-prefixes=SIZE,SIZE-UNPACKEDID %s
+; RUN: opt -passes='print' -cost-kind=code-size 2>&1 
-disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx942 < %s | FileCheck 
-check-prefixes=SIZE,SIZE-PACKEDID %s
+
+define i32 @workitem_id_x() {
+; ALL-LABEL: 'workitem_id_x'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
+;
+; SIZE-LABEL: 'workitem_id_x'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
i32 %result
+;
+  %result = call i32 @llvm.amdgcn.workitem.id.x()
+  ret i32 %result
+}
+
+define amdgpu_kernel void @kernel_workitem_id_x(ptr addrspace(1) %ptr) {
+; ALL-LABEL: 'kernel_workitem_id_x'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.x()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
void
+;
+; SIZE-LABEL: 'kernel_workitem_id_x'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.x()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
+;
+  %result = call i32 @llvm.amdgcn.workitem.id.x()
+  store i32 %result, ptr addrspace(1) %ptr
+  ret void
+}
+
+define i32 @workitem_id_y() {
+; ALL-LABEL: 'workitem_id_y'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
+;
+; SIZE-LABEL: 'workitem_id_y'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
i32 %result
+;
+  %result = call i32 @llvm.amdgcn.workitem.id.y()
+  ret i32 %result
+}
+
+define amdgpu_kernel void @kernel_workitem_id_y(ptr addrspace(1) %ptr) {
+; ALL-LABEL: 'kernel_workitem_id_y'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
void
+;
+; SIZE-LABEL: 'kernel_workitem_id_y'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y()
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store 
i32 %result, ptr addrspace(1) %ptr, align 4
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
+;
+  %result = call i32 @llvm.amdgcn.workitem.id.y()
+  store i32 %result, ptr addrspace(1) %ptr
+  ret void
+}
+
+define i32 @workitem_id_z() {
+; ALL-LABEL: 'workitem_id_z'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %result 
= call i32 @llvm.amdgcn.workitem.id.y()
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
i32 %result
+;
+; SIZE-LABEL: 'workitem_id_z'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%result = call i32 @llvm.amdgcn.workitem.id.y

[llvm-branch-commits] [llvm] AMDGPU: Reduce cost of f64 copysign (PR #141944)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141944

>From 641ab37922230a88206b08d07b76df77c9d82512 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 15:20:50 +0200
Subject: [PATCH] AMDGPU: Reduce cost of f64 copysign

The real implementation is 1 real instruction plus a constant
materialize. Call that a 1, it's not a real f64 operation.
---
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  | 12 ---
 .../Analysis/CostModel/AMDGPU/copysign.ll | 32 +--
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index b2b25ac66677e..b79c9be3eac93 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -718,9 +718,6 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
 
   MVT::SimpleValueType SLT = LT.second.getScalarType().SimpleTy;
 
-  if (SLT == MVT::f64)
-return LT.first * NElts * get64BitInstrCost(CostKind);
-
   if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
   (ST->hasPackedFP32Ops() && SLT == MVT::f32))
 NElts = (NElts + 1) / 2;
@@ -731,6 +728,11 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   switch (ICA.getID()) {
   case Intrinsic::fma:
   case Intrinsic::fmuladd:
+if (SLT == MVT::f64) {
+  InstRate = get64BitInstrCost(CostKind);
+  break;
+}
+
 if ((SLT == MVT::f32 && ST->hasFastFMAF32()) || SLT == MVT::f16)
   InstRate = getFullRateInstrCost();
 else {
@@ -741,8 +743,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   case Intrinsic::copysign:
 return NElts * getFullRateInstrCost();
   case Intrinsic::canonicalize: {
-assert(SLT != MVT::f64);
-InstRate = getFullRateInstrCost();
+InstRate =
+SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost();
 break;
   }
   case Intrinsic::uadd_sat:
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
index 334bb341a3c3e..5b042a8a04603 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/copysign.ll
@@ -245,25 +245,25 @@ define void @copysign_bf16() {
 
 define void @copysign_f64() {
 ; ALL-LABEL: 'copysign_f64'
-; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = 
call double @llvm.copysign.f64(double undef, double undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 
= call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 
= call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 
= call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f64 
= call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v8f64 
= call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: 
%v9f64 = call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x 
double> undef)
-; ALL-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: 
%v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x 
double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = 
call double @llvm.copysign.f64(double undef, double undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 
= call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 
= call <3 x double> @llvm.copysign.v3f64(<3 x double> undef, <3 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 
= call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f64 
= call <5 x double> @llvm.copysign.v5f64(<5 x double> undef, <5 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f64 
= call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v9f64 
= call <9 x double> @llvm.copysign.v9f64(<9 x double> undef, <9 x double> undef)
+; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%v16f64 = call <16 x double> @llvm.copysign.v16f64(<16 x double> undef, <16 x 
double> undef

[llvm-branch-commits] [llvm] AMDGPU: Move fpenvIEEEMode into TTI (PR #141945)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141945

>From 99162375bef4a757fc95bfba805c559b7b13fbfe Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 11:16:22 +0200
Subject: [PATCH] AMDGPU: Move fpenvIEEEMode into TTI

---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp | 28 ++-
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  | 17 +++
 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h |  7 +
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 9be8821d5bf96..d12170a60905b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -60,28 +60,6 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const 
APFloat &Src1,
   return maxnum(Src0, Src1);
 }
 
-enum class KnownIEEEMode { Unknown, On, Off };
-
-/// Return KnownIEEEMode::On if we know if the use context can assume
-/// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
-/// "amdgpu-ieee"="false".
-static KnownIEEEMode fpenvIEEEMode(const Instruction &I,
-   const GCNSubtarget &ST) {
-  if (!ST.hasIEEEMode()) // Only mode on gfx12
-return KnownIEEEMode::On;
-
-  const Function *F = I.getFunction();
-  if (!F)
-return KnownIEEEMode::Unknown;
-
-  Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee");
-  if (IEEEAttr.isValid())
-return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off;
-
-  return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off
-   : KnownIEEEMode::On;
-}
-
 // Check if a value can be converted to a 16-bit value without losing
 // precision.
 // The value is expected to be either a float (IsFloat = true) or an unsigned
@@ -1004,7 +982,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
 // TODO: Also can fold to 2 operands with infinities.
 if ((match(Src0, m_APFloat(ConstSrc0)) && ConstSrc0->isNaN()) ||
 isa(Src0)) {
-  switch (fpenvIEEEMode(II, *ST)) {
+  switch (fpenvIEEEMode(II)) {
   case KnownIEEEMode::On:
 // TODO: If Src2 is snan, does it need quieting?
 if (ConstSrc0 && ConstSrc0->isSignaling())
@@ -1019,7 +997,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
   }
 } else if ((match(Src1, m_APFloat(ConstSrc1)) && ConstSrc1->isNaN()) ||
isa(Src1)) {
-  switch (fpenvIEEEMode(II, *ST)) {
+  switch (fpenvIEEEMode(II)) {
   case KnownIEEEMode::On:
 // TODO: If Src2 is snan, does it need quieting?
 if (ConstSrc1 && ConstSrc1->isSignaling())
@@ -1035,7 +1013,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, 
IntrinsicInst &II) const {
   }
 } else if ((match(Src2, m_APFloat(ConstSrc2)) && ConstSrc2->isNaN()) ||
isa(Src2)) {
-  switch (fpenvIEEEMode(II, *ST)) {
+  switch (fpenvIEEEMode(II)) {
   case KnownIEEEMode::On:
 if (ConstSrc2 && ConstSrc2->isSignaling()) {
   auto *Quieted = ConstantFP::get(II.getType(), 
ConstSrc2->makeQuiet());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index b79c9be3eac93..ce2098a3a19bb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1445,3 +1445,20 @@ void GCNTTIImpl::collectKernelLaunchBounds(
   LB.push_back({"amdgpu-waves-per-eu[0]", WavesPerEU.first});
   LB.push_back({"amdgpu-waves-per-eu[1]", WavesPerEU.second});
 }
+
+GCNTTIImpl::KnownIEEEMode
+GCNTTIImpl::fpenvIEEEMode(const Instruction &I) const {
+  if (!ST->hasIEEEMode()) // Only mode on gfx12
+return KnownIEEEMode::On;
+
+  const Function *F = I.getFunction();
+  if (!F)
+return KnownIEEEMode::Unknown;
+
+  Attribute IEEEAttr = F->getFnAttribute("amdgpu-ieee");
+  if (IEEEAttr.isValid())
+return IEEEAttr.getValueAsBool() ? KnownIEEEMode::On : KnownIEEEMode::Off;
+
+  return AMDGPU::isShader(F->getCallingConv()) ? KnownIEEEMode::Off
+   : KnownIEEEMode::On;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index ec298c7e9631a..0fae301abf532 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -281,6 +281,13 @@ class GCNTTIImpl final : public 
BasicTTIImplBase {
   void collectKernelLaunchBounds(
   const Function &F,
   SmallVectorImpl> &LB) const override;
+
+  enum class KnownIEEEMode { Unknown, On, Off };
+
+  /// Return KnownIEEEMode::On if we know if the use context can assume
+  /// "amdgpu-ieee"="true" and KnownIEEEMode::Off if we can assume
+  /// "amdgpu-ieee"="false".
+

[llvm-branch-commits] [llvm] AMDGPU: Fix cost model for 16-bit operations on gfx8 (PR #141943)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/141943

>From 7fbe4e233098676cc2af8aaad48a1eb5f8cb360f Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 29 May 2025 14:41:33 +0200
Subject: [PATCH] AMDGPU: Fix cost model for 16-bit operations on gfx8

We should only divide the number of pieces to fit the packed instructions
if we actually have pk instructions. This increases the cost of copysign,
but is closer to the current codegen output. It could be much cheaper
than it is now.
---
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp  |  2 +-
 .../Analysis/CostModel/AMDGPU/canonicalize.ll | 24 
 .../Analysis/CostModel/AMDGPU/copysign.ll | 28 +--
 .../SLPVectorizer/AMDGPU/slp-v2f16.ll | 12 
 4 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 58bfc0b80b24f..b2b25ac66677e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -721,7 +721,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
   if (SLT == MVT::f64)
 return LT.first * NElts * get64BitInstrCost(CostKind);
 
-  if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
+  if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
   (ST->hasPackedFP32Ops() && SLT == MVT::f32))
 NElts = (NElts + 1) / 2;
 
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll 
b/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll
index e162edbf611e2..7ac4db3119210 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/canonicalize.ll
@@ -22,12 +22,12 @@ define void @canonicalize_f16() {
 ;
 ; GFX8-LABEL: 'canonicalize_f16'
 ; GFX8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = 
call half @llvm.canonicalize.f16(half undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 
= call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 
= call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 
= call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 
= call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: 
%v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef)
-; GFX8-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: 
%v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef)
+; GFX8-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 
= call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef)
+; GFX8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 
= call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef)
+; GFX8-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 
= call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef)
+; GFX8-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 
= call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef)
+; GFX8-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef)
+; GFX8-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: 
%v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef)
 ; GFX8-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret 
void
 ;
 ; GFX9-LABEL: 'canonicalize_f16'
@@ -62,12 +62,12 @@ define void @canonicalize_f16() {
 ;
 ; GFX8-SIZE-LABEL: 'canonicalize_f16'
 ; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%f16 = call half @llvm.canonicalize.f16(half undef)
-; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%v2f16 = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> undef)
-; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%v3f16 = call <3 x half> @llvm.canonicalize.v3f16(<3 x half> undef)
-; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%v4f16 = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> undef)
-; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: 
%v5f16 = call <5 x half> @llvm.canonicalize.v5f16(<5 x half> undef)
-; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: 
%v16f16 = call <16 x half> @llvm.canonicalize.v16f16(<16 x half> undef)
-; GFX8-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: 
%v17f16 = call <17 x half> @llvm.canonicalize.v17f16(<17 x half> undef)
+; GFX8-SIZE-NEXT:

[llvm-branch-commits] [lld] [llvm] release/20.x: [lld][WebAssembly] Support for the custom-page-sizes WebAssembly proposal (#128942) (PR #129762)

2025-06-17 Thread via llvm-branch-commits

BilelGho wrote:

How is the status on this? Is there any workaround to compile to wasm with 
memories smaller than 64KiB for embedded systems for demonstration purposes? 

https://github.com/llvm/llvm-project/pull/129762
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [Driver] Forward sysroot from Driver to linker in BareMetal ToolChain Object (PR #132808)

2025-06-17 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/132808

>From 8b9b09ac86bd86f0151688d2fb699354a7857468 Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Mon, 24 Mar 2025 07:04:59 -0700
Subject: [PATCH] [Driver] Forward sysroot from Driver to linker in BareMetal
 ToolChain Object

RISCVToolChain object passes `--sysroot` option from clang to gnuld. Adding
the supprt for the same in BareMetal toolchain object.

This is done as a part of the effort to merge RISCVToolchain object into
BareMetal toolchain object.

This is the 5th patch in the series of patches for merging RISCVToolchain object
into BareMetal toolchain object.

RFC:
https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524

Change-Id: Ie830bf6d126fea46dc225e5ef97e14349765ba07
---
 clang/lib/Driver/ToolChains/BareMetal.cpp |  3 +
 clang/test/Driver/aarch64-toolchain.c |  5 +-
 clang/test/Driver/arm-toolchain.c |  3 +
 clang/test/Driver/baremetal.cpp   | 96 +--
 4 files changed, 82 insertions(+), 25 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index d4e4e6d04b417..84d9747488060 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -568,6 +568,9 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
   const llvm::Triple::ArchType Arch = TC.getArch();
   const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
 
+  if (!D.SysRoot.empty())
+CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
+
   CmdArgs.push_back("-Bstatic");
 
   if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax))
diff --git a/clang/test/Driver/aarch64-toolchain.c 
b/clang/test/Driver/aarch64-toolchain.c
index e12107fa2c506..d6628aa3e2e36 100644
--- a/clang/test/Driver/aarch64-toolchain.c
+++ b/clang/test/Driver/aarch64-toolchain.c
@@ -29,6 +29,7 @@
 // C-AARCH64-BAREMETAL: "-isysroot" 
"{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // C-AARCH64-BAREMETAL: "-internal-isystem" 
"{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // C-AARCH64-BAREMETAL: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
+// C-AARCH64-BAREMETAL: 
"--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // C-AARCH64-BAREMETAL: "-Bstatic" "-EL"
 // C-AARCH64-BAREMETAL: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // C-AARCH64-BAREMETAL: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
@@ -65,6 +66,7 @@
 // CXX-AARCH64-BAREMETAL: "-internal-isystem" 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/8.2.1"
 // CXX-AARCH64-BAREMETAL: "-internal-isystem" 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // CXX-AARCH64-BAREMETAL: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
+// CXX-AARCH64-BAREMETAL: 
"--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // CXX-AARCH64-BAREMETAL: "-Bstatic" "-EL"
 // CXX-AARCH64-BAREMETAL: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // CXX-AARCH64-BAREMETAL: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
@@ -101,7 +103,8 @@
 // CXX-AARCH64-BAREMETAL-LIBCXX: "-isysroot" 
"{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include/c++/v1"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
-// CXX-AARCH64-BAREMETAL-LIBCXX: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld
+// CXX-AARCH64-BAREMETAL-LIBCXX: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
+// CXX-AARCH64-BAREMETAL-LIBCXX: 
"--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "-Bstatic" "-EL"
 // CXX-AARCH64-BAREMETAL-LIBCXX: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // CXX-AARCH64-BAREMETAL-LIBCXX: 
"{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
diff --git a/clang/test/Driver/arm-toolchain.c 
b/clang/test/Driver/arm-toolchain.c
index d4f9bf2aaf3d5..3e507be44a8dd 100644
--- a/clang/test/Driver/arm-toolchain.c
+++ b/clang/test/Driver/arm-toolchain.c
@@ -28,6 +28,7 @@
 // C-ARM-BAREMETAL: "-isysroot" 
"{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi"
 // C-ARM-BAREMETAL: "-internal-isystem" 
"{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include"
 // C-ARM-BAREMETAL: 
"{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
+// C-ARM-BAREMETAL: 
"--sysroot={{.*}}/Inputs/basic_arm_

[llvm-branch-commits] [clang] [RISCV][Driver] Add support for `-m` flag to linker job of Baremetal toolchain (PR #134442)

2025-06-17 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/134442

>From 43f577397566b022866c4acbb409b860473649b8 Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Fri, 4 Apr 2025 12:51:19 -0700
Subject: [PATCH] [RISCV][Driver] Add support for `-m` flag to linker job of
 Baremetal toolchain.

Change-Id: Ifce8a3a7f1df9c12561d35ca3c923595e3619428
---
 clang/include/clang/Driver/CommonArgs.h|  2 +
 clang/lib/Driver/ToolChains/BareMetal.cpp  | 15 -
 clang/lib/Driver/ToolChains/CommonArgs.cpp | 70 ++
 clang/lib/Driver/ToolChains/Gnu.cpp| 70 --
 clang/test/Driver/aarch64-toolchain.c  | 14 ++---
 clang/test/Driver/arm-toolchain.c  | 14 ++---
 clang/test/Driver/baremetal.cpp| 51 
 7 files changed, 125 insertions(+), 111 deletions(-)

diff --git a/clang/include/clang/Driver/CommonArgs.h 
b/clang/include/clang/Driver/CommonArgs.h
index ddb21c1e8a8b8..26aa3ccf84786 100644
--- a/clang/include/clang/Driver/CommonArgs.h
+++ b/clang/include/clang/Driver/CommonArgs.h
@@ -31,6 +31,8 @@ void AddLinkerInputs(const ToolChain &TC, const InputInfoList 
&Inputs,
  const llvm::opt::ArgList &Args,
  llvm::opt::ArgStringList &CmdArgs, const JobAction &JA);
 
+const char *getLDMOption(const llvm::Triple &T, const llvm::opt::ArgList 
&Args);
+
 void addLinkerCompressDebugSectionsOption(const ToolChain &TC,
   const llvm::opt::ArgList &Args,
   llvm::opt::ArgStringList &CmdArgs);
diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index 84d9747488060..fd36a5e7f0af0 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -573,8 +573,19 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 
   CmdArgs.push_back("-Bstatic");
 
-  if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax))
-CmdArgs.push_back("--no-relax");
+  if (const char *LDMOption = getLDMOption(TC.getTriple(), Args)) {
+CmdArgs.push_back("-m");
+CmdArgs.push_back(LDMOption);
+  } else {
+D.Diag(diag::err_target_unknown_triple) << Triple.str();
+return;
+  }
+
+  if (Triple.isRISCV()) {
+CmdArgs.push_back("-X");
+if (Args.hasArg(options::OPT_mno_relax))
+  CmdArgs.push_back("--no-relax");
+  }
 
   if (Triple.isARM() || Triple.isThumb()) {
 bool IsBigEndian = arm::isARMBigEndian(Triple, Args);
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp 
b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index d5b2c5c1e199e..b1415ba6523dd 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -527,6 +527,76 @@ void tools::AddLinkerInputs(const ToolChain &TC, const 
InputInfoList &Inputs,
   }
 }
 
+const char *tools::getLDMOption(const llvm::Triple &T, const ArgList &Args) {
+  switch (T.getArch()) {
+  case llvm::Triple::x86:
+if (T.isOSIAMCU())
+  return "elf_iamcu";
+return "elf_i386";
+  case llvm::Triple::aarch64:
+return "aarch64linux";
+  case llvm::Triple::aarch64_be:
+return "aarch64linuxb";
+  case llvm::Triple::arm:
+  case llvm::Triple::thumb:
+  case llvm::Triple::armeb:
+  case llvm::Triple::thumbeb:
+return tools::arm::isARMBigEndian(T, Args) ? "armelfb_linux_eabi"
+   : "armelf_linux_eabi";
+  case llvm::Triple::m68k:
+return "m68kelf";
+  case llvm::Triple::ppc:
+if (T.isOSLinux())
+  return "elf32ppclinux";
+return "elf32ppc";
+  case llvm::Triple::ppcle:
+if (T.isOSLinux())
+  return "elf32lppclinux";
+return "elf32lppc";
+  case llvm::Triple::ppc64:
+return "elf64ppc";
+  case llvm::Triple::ppc64le:
+return "elf64lppc";
+  case llvm::Triple::riscv32:
+return "elf32lriscv";
+  case llvm::Triple::riscv64:
+return "elf64lriscv";
+  case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
+return "elf32_sparc";
+  case llvm::Triple::sparcv9:
+return "elf64_sparc";
+  case llvm::Triple::loongarch32:
+return "elf32loongarch";
+  case llvm::Triple::loongarch64:
+return "elf64loongarch";
+  case llvm::Triple::mips:
+return "elf32btsmip";
+  case llvm::Triple::mipsel:
+return "elf32ltsmip";
+  case llvm::Triple::mips64:
+if (tools::mips::hasMipsAbiArg(Args, "n32") || T.isABIN32())
+  return "elf32btsmipn32";
+return "elf64btsmip";
+  case llvm::Triple::mips64el:
+if (tools::mips::hasMipsAbiArg(Args, "n32") || T.isABIN32())
+  return "elf32ltsmipn32";
+return "elf64ltsmip";
+  case llvm::Triple::systemz:
+return "elf64_s390";
+  case llvm::Triple::x86_64:
+if (T.isX32())
+  return "elf32_x86_64";
+return "elf_x86_64";
+  case llvm::Triple::ve:
+return "elf64ve";
+  case llvm::Triple::csky:
+return "cskyelf_linux";
+  default:
+re

[llvm-branch-commits] [clang] [Driver] Fix link order of BareMetal toolchain object (PR #132806)

2025-06-17 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/132806

>From c5642ff5874419ad9ac765631959dca4b091437c Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Mon, 24 Mar 2025 06:17:42 -0700
Subject: [PATCH] [Driver] Fix link order of BareMetal toolchain object

The linker job in BareMetal toolchain object will be used by gnuld and lld both.
However, gnuld process the arguments in the order in which they appear on 
command
line, whereas there is no such restriction with lld.

The previous order was:
LibraryPaths -> Libraries -> LTOOptions -> LinkerInputs
The new iorder is:
LibraryPaths -> LTOOptions -> LinkerInputs -> Libraries

LTO options need to be added before adding any linker inputs because file format
after compile stage during LTO is bitcode which gnuld natively cannot process.
Hence iwill need to pass appropriate plugins before adding any bitcode file on 
the
command line.

Object files that are getting linked need to be passed before processing any
libraries so that gnuld can appropriately do symbol resolution for the symbols
for which no definition is provided through user code.

Similar link order is also followed by other linker jobs for gnuld such as in
gnutools::Linker in Gnu.cpp

This is the 3rd patch in the series of patches of merging RISCVToolchain into
BareMetal toolchain object.

RFC:
https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524

Change-Id: I0e68e403c08b5687cc3346e833981f7b9f3819c4
---
 clang/lib/Driver/ToolChains/BareMetal.cpp   | 12 ++--
 clang/test/Driver/aarch64-toolchain-extra.c |  2 +-
 clang/test/Driver/aarch64-toolchain.c   | 28 
 clang/test/Driver/arm-toolchain-extra.c |  2 +-
 clang/test/Driver/arm-toolchain.c   | 28 
 clang/test/Driver/baremetal-multilib.yaml   |  3 +-
 clang/test/Driver/baremetal-sysroot.cpp |  8 ++-
 clang/test/Driver/baremetal.cpp | 79 +
 8 files changed, 92 insertions(+), 70 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index a08bb588dd764..a665040662a3f 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -568,8 +568,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
   const llvm::Triple::ArchType Arch = TC.getArch();
   const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
 
-  AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA);
-
   CmdArgs.push_back("-Bstatic");
 
   if (TC.getTriple().isRISCV() && Args.hasArg(options::OPT_mno_relax))
@@ -619,6 +617,12 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
   for (const auto &LibPath : TC.getLibraryPaths())
 CmdArgs.push_back(Args.MakeArgString(llvm::Twine("-L", LibPath)));
 
+  if (D.isUsingLTO())
+addLTOOptions(TC, Args, CmdArgs, Output, Inputs,
+  D.getLTOMode() == LTOK_Thin);
+
+  AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA);
+
   if (TC.ShouldLinkCXXStdlib(Args)) {
 bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
!Args.hasArg(options::OPT_static);
@@ -639,10 +643,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 CmdArgs.push_back("--end-group");
   }
 
-  if (D.isUsingLTO())
-addLTOOptions(TC, Args, CmdArgs, Output, Inputs,
-  D.getLTOMode() == LTOK_Thin);
-
   if ((TC.hasValidGCCInstallation() || detectGCCToolchainAdjacent(D)) &&
   NeedCRTs)
 CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd)));
diff --git a/clang/test/Driver/aarch64-toolchain-extra.c 
b/clang/test/Driver/aarch64-toolchain-extra.c
index 2a930e35acd45..a0b5f2902962f 100644
--- a/clang/test/Driver/aarch64-toolchain-extra.c
+++ b/clang/test/Driver/aarch64-toolchain-extra.c
@@ -31,5 +31,5 @@
 // C-AARCH64-BAREMETAL-NOGCC: 
"{{.*}}/aarch64-nogcc/bin/../aarch64-none-elf/lib/crt0.o"
 // C-AARCH64-BAREMETAL-NOGCC: 
"{{.*}}/aarch64-nogcc/{{.*}}/aarch64-none-elf/lib/crtbegin.o"
 // C-AARCH64-BAREMETAL-NOGCC: 
"{{.*}}/aarch64-nogcc/bin/../aarch64-none-elf/lib"
-// C-AARCH64-BAREMETAL-NOGCC: "--start-group" "-lgcc" "--as-needed" "-lgcc_s" 
"--no-as-needed" "-lc" "-lgloss" "--end-group"
+// C-AARCH64-BAREMETAL-NOGCC: "{{.*}}.o" "--start-group" "-lgcc" "--as-needed" 
"-lgcc_s" "--no-as-needed" "-lc" "-lgloss" "--end-group"
 // C-AARCH64-BAREMETAL-NOGCC: 
"{{.*}}/aarch64-nogcc/{{.*}}/aarch64-none-elf/lib/crtend.o"
diff --git a/clang/test/Driver/aarch64-toolchain.c 
b/clang/test/Driver/aarch64-toolchain.c
index 83cd95136b158..e12107fa2c506 100644
--- a/clang/test/Driver/aarch64-toolchain.c
+++ b/clang/test/Driver/aarch64-toolchain.c
@@ -11,12 +11,12 @@
 // LLD-AARCH64-BAREMETAL: "-isysroot" 
"{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // LLD-AARCH64-BAREMETAL: "-internal-isystem" 
"{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // LLD-AARCH64-BA

[llvm-branch-commits] [clang] [Driver] Add option to force undefined symbols during linking in BareMetal toolchain object. (PR #132807)

2025-06-17 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/132807

>From beaabe6b3d1e8dd633aa98511ac5274271fbcc65 Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Mon, 24 Mar 2025 06:49:09 -0700
Subject: [PATCH] [Driver] Add option to force udnefined symbols during linking
 in BareMetal toolchain object.

Add support for `-u` option to force defined symbols. This option is supported
by both lld and gnuld.

This is done as a part of the effort to merge RISCVToolchain object into
BareMetal toolchain object.

This is the 4th patch in the series of patches for merging RISCVToolchain object
into BareMetal toolchain object.

RFC:
https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524

Change-Id: Ia6597c756923a77fd9c7cb9a6ae8e52a56f5457d
---
 clang/lib/Driver/ToolChains/BareMetal.cpp   |  5 +++--
 clang/test/Driver/baremetal-undefined-symbols.c | 14 ++
 clang/test/Driver/riscv-args.c  |  6 --
 3 files changed, 17 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/Driver/baremetal-undefined-symbols.c
 delete mode 100644 clang/test/Driver/riscv-args.c

diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index a665040662a3f..d4e4e6d04b417 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -609,8 +609,9 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 }
   }
 
-  Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
-options::OPT_s, options::OPT_t, options::OPT_r});
+  Args.addAllArgs(CmdArgs,
+  {options::OPT_L, options::OPT_u, options::OPT_T_Group,
+   options::OPT_s, options::OPT_t, options::OPT_r});
 
   TC.AddFilePathLibArgs(Args, CmdArgs);
 
diff --git a/clang/test/Driver/baremetal-undefined-symbols.c 
b/clang/test/Driver/baremetal-undefined-symbols.c
new file mode 100644
index 0..bff58c7c54c33
--- /dev/null
+++ b/clang/test/Driver/baremetal-undefined-symbols.c
@@ -0,0 +1,14 @@
+// Check the arguments are correctly passed
+
+// Make sure -T is the last with gcc-toolchain option
+// RUN: %clang -### --target=riscv32 --gcc-toolchain= -Xlinker --defsym=FOO=10 
-T a.lds -u foo %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-LD %s
+// CHECK-LD: {{.*}} "--defsym=FOO=10" {{.*}} "-u" "foo" {{.*}} "-T" "a.lds"
+
+// TODO: Merge this test with the above in the last patch when finally 
integrating riscv
+// Make sure -T is the last with gcc-toolchain option
+// RUN: %clang -### --target=aarch64-none-elf --gcc-toolchain= -Xlinker 
--defsym=FOO=10 -T a.lds -u foo %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ARM-LD %s
+// RUN: %clang -### --target=armv6m-none-eabi --gcc-toolchain= -Xlinker 
--defsym=FOO=10 -T a.lds -u foo %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-ARM-LD %s
+// CHECK-ARM-LD: {{.*}} "-T" "a.lds" "-u" "foo" {{.*}} "--defsym=FOO=10"
diff --git a/clang/test/Driver/riscv-args.c b/clang/test/Driver/riscv-args.c
deleted file mode 100644
index cab08e5b0f811..0
--- a/clang/test/Driver/riscv-args.c
+++ /dev/null
@@ -1,6 +0,0 @@
-// Check the arguments are correctly passed
-
-// Make sure -T is the last with gcc-toolchain option
-// RUN: %clang -### --target=riscv32 --gcc-toolchain= -Xlinker --defsym=FOO=10 
-T a.lds -u foo %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK-LD %s
-// CHECK-LD: {{.*}} "--defsym=FOO=10" {{.*}} "-u" "foo" {{.*}} "-T" "a.lds"

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [RISCV] Integrate RISCV target in baremetal toolchain object and deprecate RISCVToolchain object (PR #121831)

2025-06-17 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/121831

>From ef8c6e265c24852c939d4ccf3b8bda7f9d13fff7 Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Mon, 6 Jan 2025 10:05:08 -0800
Subject: [PATCH] [RISCV] Integrate RISCV target in baremetal toolchain object
 and deprecate RISCVToolchain object

This patch:
- Adds CXXStdlib, runtimelib and unwindlib defaults for riscv target to
  BareMetal toolchain object.
- Add riscv 32 and 64-bit emulation flags to linker job of BareMetal
  toolchain.
- Removes call to RISCVToolChain object from llvm.

This PR is last patch in the series of patches of merging RISCVToolchain
object into BareMetal toolchain object.

RFC:
https: 
//discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524
Change-Id: Ic5d64a4ed3ebc58c30c12d9827e7e57a02eb13ca
---
 clang/lib/Driver/CMakeLists.txt   |   1 -
 clang/lib/Driver/Driver.cpp   |  10 +-
 clang/lib/Driver/ToolChains/BareMetal.cpp |  20 ++
 clang/lib/Driver/ToolChains/BareMetal.h   |  10 +-
 .../lib/Driver/ToolChains/RISCVToolchain.cpp  | 231 --
 clang/lib/Driver/ToolChains/RISCVToolchain.h  |  67 -
 .../test/Driver/baremetal-undefined-symbols.c |  14 +-
 clang/test/Driver/riscv32-toolchain-extra.c   |   7 +-
 clang/test/Driver/riscv32-toolchain.c |  26 +-
 clang/test/Driver/riscv64-toolchain-extra.c   |   7 +-
 clang/test/Driver/riscv64-toolchain.c |  20 +-
 11 files changed, 60 insertions(+), 353 deletions(-)
 delete mode 100644 clang/lib/Driver/ToolChains/RISCVToolchain.cpp
 delete mode 100644 clang/lib/Driver/ToolChains/RISCVToolchain.h

diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt
index 44e16edfb1ccf..862f3f686912f 100644
--- a/clang/lib/Driver/CMakeLists.txt
+++ b/clang/lib/Driver/CMakeLists.txt
@@ -75,7 +75,6 @@ add_clang_library(clangDriver
   ToolChains/OHOS.cpp
   ToolChains/OpenBSD.cpp
   ToolChains/PS4CPU.cpp
-  ToolChains/RISCVToolchain.cpp
   ToolChains/Solaris.cpp
   ToolChains/SPIRV.cpp
   ToolChains/SPIRVOpenMP.cpp
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 2f86b6633df1c..6b9e250c68805 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -42,7 +42,6 @@
 #include "ToolChains/PPCFreeBSD.h"
 #include "ToolChains/PPCLinux.h"
 #include "ToolChains/PS4CPU.h"
-#include "ToolChains/RISCVToolchain.h"
 #include "ToolChains/SPIRV.h"
 #include "ToolChains/SPIRVOpenMP.h"
 #include "ToolChains/SYCL.h"
@@ -6950,16 +6949,11 @@ const ToolChain &Driver::getToolChain(const ArgList 
&Args,
 TC = std::make_unique(*this, Target, Args);
 break;
   case llvm::Triple::msp430:
-TC =
-std::make_unique(*this, Target, Args);
+TC = std::make_unique(*this, Target, 
Args);
 break;
   case llvm::Triple::riscv32:
   case llvm::Triple::riscv64:
-if (toolchains::RISCVToolChain::hasGCCToolchain(*this, Args))
-  TC =
-  std::make_unique(*this, Target, 
Args);
-else
-  TC = std::make_unique(*this, Target, Args);
+TC = std::make_unique(*this, Target, Args);
 break;
   case llvm::Triple::ve:
 TC = std::make_unique(*this, Target, Args);
diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index fd36a5e7f0af0..3b9dd13012e68 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -375,6 +375,26 @@ BareMetal::OrderedMultilibs 
BareMetal::getOrderedMultilibs() const {
   return llvm::reverse(Default);
 }
 
+ToolChain::CXXStdlibType BareMetal::GetDefaultCXXStdlibType() const {
+  if (getTriple().isRISCV() && IsGCCInstallationValid)
+return ToolChain::CST_Libstdcxx;
+  return ToolChain::CST_Libcxx;
+}
+
+ToolChain::RuntimeLibType BareMetal::GetDefaultRuntimeLibType() const {
+  if (getTriple().isRISCV() && IsGCCInstallationValid)
+return ToolChain::RLT_Libgcc;
+  return ToolChain::RLT_CompilerRT;
+}
+
+ToolChain::UnwindLibType
+BareMetal::GetUnwindLibType(const llvm::opt::ArgList &Args) const {
+  if (getTriple().isRISCV())
+return ToolChain::UNW_None;
+
+  return ToolChain::GetUnwindLibType(Args);
+}
+
 void BareMetal::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
   ArgStringList &CC1Args) const {
   if (DriverArgs.hasArg(options::OPT_nostdinc))
diff --git a/clang/lib/Driver/ToolChains/BareMetal.h 
b/clang/lib/Driver/ToolChains/BareMetal.h
index 54805530bae82..cc57fa21867a2 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.h
+++ b/clang/lib/Driver/ToolChains/BareMetal.h
@@ -56,13 +56,11 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public 
Generic_ELF {
 return UnwindTableLevel::None;
   }
 
-  RuntimeLibType GetDefaultRuntimeLibType() const override {
-return ToolChain::RLT_CompilerRT;
-  }
+  CXXStdlibType GetDefaultCXXStdlibType() const override;
 

[llvm-branch-commits] [llvm] AMDGPU: Handle folding vector splats of inline split f64 inline immediates (PR #140878)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/140878

>From 5d5a54cff58c1096973d3a9c28f728ca0afe3889 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 19 May 2025 21:51:06 +0200
Subject: [PATCH] AMDGPU: Handle folding vector splats of inline split f64
 inline immediates

Recognize a reg_sequence with 32-bit elements that produce a 64-bit
splat value. This enables folding f64 constants into mfma operands
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 103 --
 .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll |  41 +--
 2 files changed, 76 insertions(+), 68 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp 
b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 565abad9b0366..8121098a97bd1 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -227,12 +227,12 @@ class SIFoldOperandsImpl {
   getRegSeqInit(SmallVectorImpl> &Defs,
 Register UseReg) const;
 
-  std::pair
+  std::pair
   isRegSeqSplat(MachineInstr &RegSeg) const;
 
-  MachineOperand *tryFoldRegSeqSplat(MachineInstr *UseMI, unsigned UseOpIdx,
- MachineOperand *SplatVal,
- const TargetRegisterClass *SplatRC) const;
+  bool tryFoldRegSeqSplat(MachineInstr *UseMI, unsigned UseOpIdx,
+  int64_t SplatVal,
+  const TargetRegisterClass *SplatRC) const;
 
   bool tryToFoldACImm(const FoldableDef &OpToFold, MachineInstr *UseMI,
   unsigned UseOpIdx,
@@ -966,15 +966,15 @@ const TargetRegisterClass 
*SIFoldOperandsImpl::getRegSeqInit(
   return getRegSeqInit(*Def, Defs);
 }
 
-std::pair
+std::pair
 SIFoldOperandsImpl::isRegSeqSplat(MachineInstr &RegSeq) const {
   SmallVector, 32> Defs;
   const TargetRegisterClass *SrcRC = getRegSeqInit(RegSeq, Defs);
   if (!SrcRC)
 return {};
 
-  // TODO: Recognize 64-bit splats broken into 32-bit pieces (i.e. recognize
-  // every other other element is 0 for 64-bit immediates)
+  bool TryToMatchSplat64 = false;
+
   int64_t Imm;
   for (unsigned I = 0, E = Defs.size(); I != E; ++I) {
 const MachineOperand *Op = Defs[I].first;
@@ -986,38 +986,75 @@ SIFoldOperandsImpl::isRegSeqSplat(MachineInstr &RegSeq) 
const {
   Imm = SubImm;
   continue;
 }
-if (Imm != SubImm)
+
+if (Imm != SubImm) {
+  if (I == 1 && (E & 1) == 0) {
+// If we have an even number of inputs, there's a chance this is a
+// 64-bit element splat broken into 32-bit pieces.
+TryToMatchSplat64 = true;
+break;
+  }
+
   return {}; // Can only fold splat constants
+}
+  }
+
+  if (!TryToMatchSplat64)
+return {Defs[0].first->getImm(), SrcRC};
+
+  // Fallback to recognizing 64-bit splats broken into 32-bit pieces
+  // (i.e. recognize every other other element is 0 for 64-bit immediates)
+  int64_t SplatVal64;
+  for (unsigned I = 0, E = Defs.size(); I != E; I += 2) {
+const MachineOperand *Op0 = Defs[I].first;
+const MachineOperand *Op1 = Defs[I + 1].first;
+
+if (!Op0->isImm() || !Op1->isImm())
+  return {};
+
+unsigned SubReg0 = Defs[I].second;
+unsigned SubReg1 = Defs[I + 1].second;
+
+// Assume we're going to generally encounter reg_sequences with sorted
+// subreg indexes, so reject any that aren't consecutive.
+if (TRI->getChannelFromSubReg(SubReg0) + 1 !=
+TRI->getChannelFromSubReg(SubReg1))
+  return {};
+
+int64_t MergedVal = Make_64(Op1->getImm(), Op0->getImm());
+if (I == 0)
+  SplatVal64 = MergedVal;
+else if (SplatVal64 != MergedVal)
+  return {};
   }
 
-  return {Defs[0].first, SrcRC};
+  const TargetRegisterClass *RC64 = TRI->getSubRegisterClass(
+  MRI->getRegClass(RegSeq.getOperand(0).getReg()), AMDGPU::sub0_sub1);
+
+  return {SplatVal64, RC64};
 }
 
-MachineOperand *SIFoldOperandsImpl::tryFoldRegSeqSplat(
-MachineInstr *UseMI, unsigned UseOpIdx, MachineOperand *SplatVal,
+bool SIFoldOperandsImpl::tryFoldRegSeqSplat(
+MachineInstr *UseMI, unsigned UseOpIdx, int64_t SplatVal,
 const TargetRegisterClass *SplatRC) const {
   const MCInstrDesc &Desc = UseMI->getDesc();
   if (UseOpIdx >= Desc.getNumOperands())
-return nullptr;
+return false;
 
   // Filter out unhandled pseudos.
   if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx))
-return nullptr;
+return false;
 
   int16_t RCID = Desc.operands()[UseOpIdx].RegClass;
   if (RCID == -1)
-return nullptr;
+return false;
+
+  const TargetRegisterClass *OpRC = TRI->getRegClass(RCID);
 
   // Special case 0/-1, since when interpreted as a 64-bit element both halves
-  // have the same bits. Effectively this code does not handle 64-bit element
-  // operands correctly, as the incoming 64-bit constants are already split 
into
-  // 32-bit sequence elements.
-  //
-  // TODO: We should try to figure out how to interpret the reg_sequence as a
-  /

[llvm-branch-commits] [llvm] AMDGPU: Handle folding vector splats of inline split f64 inline immediates (PR #140878)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/140878

>From 5d5a54cff58c1096973d3a9c28f728ca0afe3889 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 19 May 2025 21:51:06 +0200
Subject: [PATCH] AMDGPU: Handle folding vector splats of inline split f64
 inline immediates

Recognize a reg_sequence with 32-bit elements that produce a 64-bit
splat value. This enables folding f64 constants into mfma operands
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 103 --
 .../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll |  41 +--
 2 files changed, 76 insertions(+), 68 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp 
b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 565abad9b0366..8121098a97bd1 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -227,12 +227,12 @@ class SIFoldOperandsImpl {
   getRegSeqInit(SmallVectorImpl> &Defs,
 Register UseReg) const;
 
-  std::pair
+  std::pair
   isRegSeqSplat(MachineInstr &RegSeg) const;
 
-  MachineOperand *tryFoldRegSeqSplat(MachineInstr *UseMI, unsigned UseOpIdx,
- MachineOperand *SplatVal,
- const TargetRegisterClass *SplatRC) const;
+  bool tryFoldRegSeqSplat(MachineInstr *UseMI, unsigned UseOpIdx,
+  int64_t SplatVal,
+  const TargetRegisterClass *SplatRC) const;
 
   bool tryToFoldACImm(const FoldableDef &OpToFold, MachineInstr *UseMI,
   unsigned UseOpIdx,
@@ -966,15 +966,15 @@ const TargetRegisterClass 
*SIFoldOperandsImpl::getRegSeqInit(
   return getRegSeqInit(*Def, Defs);
 }
 
-std::pair
+std::pair
 SIFoldOperandsImpl::isRegSeqSplat(MachineInstr &RegSeq) const {
   SmallVector, 32> Defs;
   const TargetRegisterClass *SrcRC = getRegSeqInit(RegSeq, Defs);
   if (!SrcRC)
 return {};
 
-  // TODO: Recognize 64-bit splats broken into 32-bit pieces (i.e. recognize
-  // every other other element is 0 for 64-bit immediates)
+  bool TryToMatchSplat64 = false;
+
   int64_t Imm;
   for (unsigned I = 0, E = Defs.size(); I != E; ++I) {
 const MachineOperand *Op = Defs[I].first;
@@ -986,38 +986,75 @@ SIFoldOperandsImpl::isRegSeqSplat(MachineInstr &RegSeq) 
const {
   Imm = SubImm;
   continue;
 }
-if (Imm != SubImm)
+
+if (Imm != SubImm) {
+  if (I == 1 && (E & 1) == 0) {
+// If we have an even number of inputs, there's a chance this is a
+// 64-bit element splat broken into 32-bit pieces.
+TryToMatchSplat64 = true;
+break;
+  }
+
   return {}; // Can only fold splat constants
+}
+  }
+
+  if (!TryToMatchSplat64)
+return {Defs[0].first->getImm(), SrcRC};
+
+  // Fallback to recognizing 64-bit splats broken into 32-bit pieces
+  // (i.e. recognize every other other element is 0 for 64-bit immediates)
+  int64_t SplatVal64;
+  for (unsigned I = 0, E = Defs.size(); I != E; I += 2) {
+const MachineOperand *Op0 = Defs[I].first;
+const MachineOperand *Op1 = Defs[I + 1].first;
+
+if (!Op0->isImm() || !Op1->isImm())
+  return {};
+
+unsigned SubReg0 = Defs[I].second;
+unsigned SubReg1 = Defs[I + 1].second;
+
+// Assume we're going to generally encounter reg_sequences with sorted
+// subreg indexes, so reject any that aren't consecutive.
+if (TRI->getChannelFromSubReg(SubReg0) + 1 !=
+TRI->getChannelFromSubReg(SubReg1))
+  return {};
+
+int64_t MergedVal = Make_64(Op1->getImm(), Op0->getImm());
+if (I == 0)
+  SplatVal64 = MergedVal;
+else if (SplatVal64 != MergedVal)
+  return {};
   }
 
-  return {Defs[0].first, SrcRC};
+  const TargetRegisterClass *RC64 = TRI->getSubRegisterClass(
+  MRI->getRegClass(RegSeq.getOperand(0).getReg()), AMDGPU::sub0_sub1);
+
+  return {SplatVal64, RC64};
 }
 
-MachineOperand *SIFoldOperandsImpl::tryFoldRegSeqSplat(
-MachineInstr *UseMI, unsigned UseOpIdx, MachineOperand *SplatVal,
+bool SIFoldOperandsImpl::tryFoldRegSeqSplat(
+MachineInstr *UseMI, unsigned UseOpIdx, int64_t SplatVal,
 const TargetRegisterClass *SplatRC) const {
   const MCInstrDesc &Desc = UseMI->getDesc();
   if (UseOpIdx >= Desc.getNumOperands())
-return nullptr;
+return false;
 
   // Filter out unhandled pseudos.
   if (!AMDGPU::isSISrcOperand(Desc, UseOpIdx))
-return nullptr;
+return false;
 
   int16_t RCID = Desc.operands()[UseOpIdx].RegClass;
   if (RCID == -1)
-return nullptr;
+return false;
+
+  const TargetRegisterClass *OpRC = TRI->getRegClass(RCID);
 
   // Special case 0/-1, since when interpreted as a 64-bit element both halves
-  // have the same bits. Effectively this code does not handle 64-bit element
-  // operands correctly, as the incoming 64-bit constants are already split 
into
-  // 32-bit sequence elements.
-  //
-  // TODO: We should try to figure out how to interpret the reg_sequence as a
-  /

[llvm-branch-commits] [llvm] AMDGPU: Reduce cost of f64 copysign (PR #141944)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Jun 17, 10:54 PM UTC**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/141944).


https://github.com/llvm/llvm-project/pull/141944
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add baseline cost model tests for special argument intrinsics (PR #141947)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Jun 17, 10:54 PM UTC**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/141947).


https://github.com/llvm/llvm-project/pull/141947
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Cost model for minimumnum/maximumnum (PR #141946)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Jun 17, 10:54 PM UTC**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/141946).


https://github.com/llvm/llvm-project/pull/141946
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Report special input intrinsics as free (PR #141948)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Jun 17, 10:54 PM UTC**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/141948).


https://github.com/llvm/llvm-project/pull/141948
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Move fpenvIEEEMode into TTI (PR #141945)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Jun 17, 10:54 PM UTC**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/141945).


https://github.com/llvm/llvm-project/pull/141945
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Fix cost model for 16-bit operations on gfx8 (PR #141943)

2025-06-17 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Jun 17, 10:54 PM UTC**: A user started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/141943).


https://github.com/llvm/llvm-project/pull/141943
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [SPARC][IAS] Add definitions for cryptographic instructions (PR #139451)

2025-06-17 Thread Brad Smith via llvm-branch-commits

brad0 wrote:

@s-barannikov 

https://github.com/llvm/llvm-project/pull/139451
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Improve error accumulation in root signature parsing (PR #144465)

2025-06-17 Thread Finn Plummer via llvm-branch-commits




inbelic wrote:

I guess the awkward part is that it might be nice to clean up all the current 
error tests that are spread across many files to just one for each param type.

https://github.com/llvm/llvm-project/pull/144465
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Improve error accumulation in root signature parsing (PR #144465)

2025-06-17 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic edited 
https://github.com/llvm/llvm-project/pull/144465
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] 93b0627 - Revert "[libc++] Remove trailing newline from _LIBCPP_ASSERTION_HANDLER calls…"

2025-06-17 Thread via llvm-branch-commits

Author: Daniel Thornburgh
Date: 2025-06-17T15:50:32-07:00
New Revision: 93b062772fa316407d77eb2f71fb622568bbd669

URL: 
https://github.com/llvm/llvm-project/commit/93b062772fa316407d77eb2f71fb622568bbd669
DIFF: 
https://github.com/llvm/llvm-project/commit/93b062772fa316407d77eb2f71fb622568bbd669.diff

LOG: Revert "[libc++] Remove trailing newline from _LIBCPP_ASSERTION_HANDLER 
calls…"

This reverts commit a5a0d880736f5dc6a566374bc3b3ca0d86901510.

Added: 


Modified: 
libcxx/include/__assert
libcxx/src/verbose_abort.cpp
libcxx/test/support/check_assertion.h

Removed: 




diff  --git a/libcxx/include/__assert b/libcxx/include/__assert
index 1bfed2890b79f..90eaa6023587b 100644
--- a/libcxx/include/__assert
+++ b/libcxx/include/__assert
@@ -20,8 +20,8 @@
 #define _LIBCPP_ASSERT(expression, message)
\
   (__builtin_expect(static_cast(expression), 1)  
\
? (void)0   
\
-   : _LIBCPP_ASSERTION_HANDLER(
\
- __FILE__ ":" _LIBCPP_TOSTRING(__LINE__) ": assertion " 
_LIBCPP_TOSTRING(expression) " failed: " message))
+   : _LIBCPP_ASSERTION_HANDLER(__FILE__ ":" _LIBCPP_TOSTRING(__LINE__) ": 
assertion " _LIBCPP_TOSTRING(\
+ expression) " failed: " message "\n"))
 
 // WARNING: __builtin_assume can currently inhibit optimizations. Only add 
assumptions with a clear
 // optimization intent. See 
https://discourse.llvm.org/t/llvm-assume-blocks-optimization/71609 for a

diff  --git a/libcxx/src/verbose_abort.cpp b/libcxx/src/verbose_abort.cpp
index efb7b9be6f61c..94bdb451dee7a 100644
--- a/libcxx/src/verbose_abort.cpp
+++ b/libcxx/src/verbose_abort.cpp
@@ -30,9 +30,6 @@ _LIBCPP_WEAK void __libcpp_verbose_abort(char const* format, 
...) noexcept {
 va_list list;
 va_start(list, format);
 std::vfprintf(stderr, format, list);
-// Callers of `__libcpp_verbose_abort` do not include a newline but when
-// writing the message to stderr we need to include one.
-std::fputc('\n', stderr);
 va_end(list);
   }
 

diff  --git a/libcxx/test/support/check_assertion.h 
b/libcxx/test/support/check_assertion.h
index ea04944ea9326..a279400d651b4 100644
--- a/libcxx/test/support/check_assertion.h
+++ b/libcxx/test/support/check_assertion.h
@@ -340,7 +340,7 @@ void std::__libcpp_verbose_abort(char const* format, ...) 
noexcept {
 
   std::fprintf(stderr, "%s\n", Marker);
   std::vfprintf(stderr, format, args);
-  std::fprintf(stderr, "\n%s", Marker);
+  std::fprintf(stderr, "%s", Marker);
 
   va_end(args);
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [Driver] Add option to force undefined symbols during linking in BareMetal toolchain object. (PR #132807)

2025-06-17 Thread Garvit Gupta via llvm-branch-commits

https://github.com/quic-garvgupt updated 
https://github.com/llvm/llvm-project/pull/132807

>From c490d1f4f1d1c3a65ad722d46b87fe49ade11c21 Mon Sep 17 00:00:00 2001
From: Garvit Gupta 
Date: Mon, 24 Mar 2025 04:58:57 -0700
Subject: [PATCH 1/3] [Driver] Add support for crtbegin.o, crtend.o and
 libgloss lib to BareMetal toolchain object

This patch conditionalise the addition of crt{begin,end}.o object files along
with addition of -lgloss lib based on whether libc selected is newlib or llvm
libc. Since there is no way a user can specify which libc it wants to link
against, currently passing valid GCCInstallation to driver will select newlib
otherwise it will default to llvm libc.

Moreover, this patch makes gnuld the default linker for baremetal toolchain
object. User need to pass `-fuse-ld=lld` explicitly to driver to select lld

This is the 2nd patch in the series of patches of merging RISCVToolchain into
BareMetal toolchain object.

RFC:
https://discourse.llvm.org/t/merging-riscvtoolchain-and-baremetal-toolchains/75524

Change-Id: Ie06dc976c306cf04ec2733bbb2d271c57d201f86
---
 clang/lib/Driver/ToolChains/BareMetal.cpp   | 37 +++-
 clang/lib/Driver/ToolChains/BareMetal.h |  3 +-
 clang/test/Driver/aarch64-toolchain-extra.c | 13 ++-
 clang/test/Driver/aarch64-toolchain.c   | 95 
 clang/test/Driver/arm-toolchain-extra.c |  7 ++
 clang/test/Driver/arm-toolchain.c   | 99 -
 clang/test/Driver/baremetal.cpp |  3 +-
 clang/test/Driver/sanitizer-ld.c|  2 +-
 8 files changed, 246 insertions(+), 13 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp 
b/clang/lib/Driver/ToolChains/BareMetal.cpp
index 0fbfe6c77f342..a08bb588dd764 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -584,9 +584,31 @@ void baremetal::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles,
-   options::OPT_r)) {
-CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o")));
+  bool NeedCRTs =
+  !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles);
+
+  const char *CRTBegin, *CRTEnd;
+  if (NeedCRTs) {
+if (!Args.hasArg(options::OPT_r))
+  CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crt0.o")));
+if (TC.hasValidGCCInstallation() || detectGCCToolchainAdjacent(D)) {
+  auto RuntimeLib = TC.GetRuntimeLibType(Args);
+  switch (RuntimeLib) {
+  case (ToolChain::RLT_Libgcc): {
+CRTBegin = "crtbegin.o";
+CRTEnd = "crtend.o";
+break;
+  }
+  case (ToolChain::RLT_CompilerRT): {
+CRTBegin =
+TC.getCompilerRTArgString(Args, "crtbegin", ToolChain::FT_Object);
+CRTEnd =
+TC.getCompilerRTArgString(Args, "crtend", ToolChain::FT_Object);
+break;
+  }
+  }
+  CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTBegin)));
+}
   }
 
   Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
@@ -609,15 +631,22 @@ void baremetal::Linker::ConstructJob(Compilation &C, 
const JobAction &JA,
   }
 
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+CmdArgs.push_back("--start-group");
 AddRunTimeLibs(TC, D, CmdArgs, Args);
-
 CmdArgs.push_back("-lc");
+if (TC.hasValidGCCInstallation() || detectGCCToolchainAdjacent(D))
+  CmdArgs.push_back("-lgloss");
+CmdArgs.push_back("--end-group");
   }
 
   if (D.isUsingLTO())
 addLTOOptions(TC, Args, CmdArgs, Output, Inputs,
   D.getLTOMode() == LTOK_Thin);
 
+  if ((TC.hasValidGCCInstallation() || detectGCCToolchainAdjacent(D)) &&
+  NeedCRTs)
+CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd)));
+
   if (TC.getTriple().isRISCV())
 CmdArgs.push_back("-X");
 
diff --git a/clang/lib/Driver/ToolChains/BareMetal.h 
b/clang/lib/Driver/ToolChains/BareMetal.h
index 930f8584e6435..54805530bae82 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.h
+++ b/clang/lib/Driver/ToolChains/BareMetal.h
@@ -38,6 +38,7 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF {
 public:
   bool initGCCInstallation(const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
+  bool hasValidGCCInstallation() const { return IsGCCInstallationValid; }
   bool isBareMetal() const override { return true; }
   bool isCrossCompiling() const override { return true; }
   bool HasNativeLLVMSupport() const override { return true; }
@@ -63,8 +64,6 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF {
 return ToolChain::CST_Libcxx;
   }
 
-  const char *getDefaultLinker() const override { return "ld.lld"; }
-
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
 llvm::opt::ArgStringList &CC1Ar

[llvm-branch-commits] [llvm] [IR2Vec] Simplifying creation of Embedder (PR #143999)

2025-06-17 Thread S. VenkataKeerthy via llvm-branch-commits

https://github.com/svkeerthy updated 
https://github.com/llvm/llvm-project/pull/143999

>From ea224dfb11b37573f5dbdd34ca118fee5a9808c1 Mon Sep 17 00:00:00 2001
From: svkeerthy 
Date: Thu, 12 Jun 2025 23:54:10 +
Subject: [PATCH] Simplifying creation of Embedder

---
 llvm/docs/MLGO.rst|  7 +--
 llvm/include/llvm/Analysis/IR2Vec.h   |  4 +-
 .../Analysis/FunctionPropertiesAnalysis.cpp   | 10 ++---
 llvm/lib/Analysis/IR2Vec.cpp  | 17 +++
 .../FunctionPropertiesAnalysisTest.cpp|  7 ++-
 llvm/unittests/Analysis/IR2VecTest.cpp| 44 +++
 6 files changed, 33 insertions(+), 56 deletions(-)

diff --git a/llvm/docs/MLGO.rst b/llvm/docs/MLGO.rst
index 28095447f6a5a..0b849f3382f63 100644
--- a/llvm/docs/MLGO.rst
+++ b/llvm/docs/MLGO.rst
@@ -482,14 +482,9 @@ embeddings can be computed and accessed via an 
``ir2vec::Embedder`` instance.
 
   // Assuming F is an llvm::Function&
   // For example, using IR2VecKind::Symbolic:
-  Expected> EmbOrErr =
+  std::unique_ptr Emb =
   ir2vec::Embedder::create(IR2VecKind::Symbolic, F, Vocabulary);
 
-  if (auto Err = EmbOrErr.takeError()) {
-// Handle error in embedder creation
-return;
-  }
-  std::unique_ptr Emb = std::move(*EmbOrErr);
 
 3. **Compute and Access Embeddings**:
Call ``getFunctionVector()`` to get the embedding for the function. 
diff --git a/llvm/include/llvm/Analysis/IR2Vec.h 
b/llvm/include/llvm/Analysis/IR2Vec.h
index 2a7a6edda70a8..06312562060aa 100644
--- a/llvm/include/llvm/Analysis/IR2Vec.h
+++ b/llvm/include/llvm/Analysis/IR2Vec.h
@@ -170,8 +170,8 @@ class Embedder {
   virtual ~Embedder() = default;
 
   /// Factory method to create an Embedder object.
-  static Expected>
-  create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary);
+  static std::unique_ptr create(IR2VecKind Mode, const Function &F,
+  const Vocab &Vocabulary);
 
   /// Returns a map containing instructions and the corresponding embeddings 
for
   /// the function F if it has been computed. If not, it computes the 
embeddings
diff --git a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp 
b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
index 29d3aaf46dc06..dd4eb7f0df053 100644
--- a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
+++ b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
@@ -204,16 +204,12 @@ void FunctionPropertiesInfo::updateForBB(const BasicBlock 
&BB,
 // We instantiate the IR2Vec embedder each time, as having an unique
 // pointer to the embedder as member of the class would make it
 // non-copyable. Instantiating the embedder in itself is not costly.
-auto EmbOrErr = ir2vec::Embedder::create(IR2VecKind::Symbolic,
+auto Embedder = ir2vec::Embedder::create(IR2VecKind::Symbolic,
  *BB.getParent(), *IR2VecVocab);
-if (Error Err = EmbOrErr.takeError()) {
-  handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
-BB.getContext().emitError("Error creating IR2Vec embeddings: " +
-  EI.message());
-  });
+if (!Embedder) {
+  BB.getContext().emitError("Error creating IR2Vec embeddings");
   return;
 }
-auto Embedder = std::move(*EmbOrErr);
 const auto &BBEmbedding = Embedder->getBBVector(BB);
 // Subtract BBEmbedding from Function embedding if the direction is -1,
 // and add it if the direction is +1.
diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp
index 7ff7acebedf4e..27cc2a4109879 100644
--- a/llvm/lib/Analysis/IR2Vec.cpp
+++ b/llvm/lib/Analysis/IR2Vec.cpp
@@ -123,13 +123,14 @@ Embedder::Embedder(const Function &F, const Vocab 
&Vocabulary)
   Dimension(Vocabulary.begin()->second.size()), OpcWeight(::OpcWeight),
   TypeWeight(::TypeWeight), ArgWeight(::ArgWeight) {}
 
-Expected>
-Embedder::create(IR2VecKind Mode, const Function &F, const Vocab &Vocabulary) {
+std::unique_ptr Embedder::create(IR2VecKind Mode, const Function &F,
+   const Vocab &Vocabulary) {
   switch (Mode) {
   case IR2VecKind::Symbolic:
 return std::make_unique(F, Vocabulary);
   }
-  return make_error("Unknown IR2VecKind", errc::invalid_argument);
+  llvm_unreachable("Unknown IR2Vec kind");
+  return nullptr;
 }
 
 // FIXME: Currently lookups are string based. Use numeric Keys
@@ -384,17 +385,13 @@ PreservedAnalyses IR2VecPrinterPass::run(Module &M,
 
   auto Vocab = IR2VecVocabResult.getVocabulary();
   for (Function &F : M) {
-Expected> EmbOrErr =
+std::unique_ptr Emb =
 Embedder::create(IR2VecKind::Symbolic, F, Vocab);
-if (auto Err = EmbOrErr.takeError()) {
-  handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
-OS << "Error creating IR2Vec embeddings: " << EI.message() << "\n";
-  });
+if (!Emb) {
+  OS << "Error creating I

[llvm-branch-commits] [llvm] [NFC] Formatting PassRegistry.def (PR #144139)

2025-06-17 Thread S. VenkataKeerthy via llvm-branch-commits

https://github.com/svkeerthy updated 
https://github.com/llvm/llvm-project/pull/144139

>From cd6a0f4fbfa87df8bed4efcdf066530523f5ec0d Mon Sep 17 00:00:00 2001
From: svkeerthy 
Date: Fri, 13 Jun 2025 18:22:10 +
Subject: [PATCH] [NFC] Formatting PassRegistry.def

---
 llvm/lib/Passes/PassRegistry.def | 40 ++--
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index f761d0dab09a8..b1570162d3434 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -63,7 +63,8 @@ MODULE_PASS("coro-early", CoroEarlyPass())
 MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
 MODULE_PASS("ctx-instr-gen",
 PGOInstrumentationGen(PGOInstrumentationType::CTXPROF))
-MODULE_PASS("ctx-prof-flatten", 
PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false))
+MODULE_PASS("ctx-prof-flatten",
+PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false))
 MODULE_PASS("ctx-prof-flatten-prethinlink",
 PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true))
 MODULE_PASS("noinline-nonprevailing", NoinlineNonPrevailing())
@@ -74,7 +75,8 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
 MODULE_PASS("dxil-upgrade", DXILUpgradePass())
 MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
 MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
-MODULE_PASS("expand-variadics", 
ExpandVariadicsPass(ExpandVariadicsMode::Disable))
+MODULE_PASS("expand-variadics",
+ExpandVariadicsPass(ExpandVariadicsMode::Disable))
 MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
 MODULE_PASS("function-import", FunctionImportPass())
 MODULE_PASS("global-merge-func", GlobalMergeFuncPass())
@@ -104,7 +106,10 @@ MODULE_PASS("lower-ifunc", LowerIFuncPass())
 MODULE_PASS("simplify-type-tests", SimplifyTypeTestsPass())
 MODULE_PASS("lowertypetests", LowerTypeTestsPass())
 MODULE_PASS("fatlto-cleanup", FatLtoCleanup())
-MODULE_PASS("pgo-force-function-attrs", PGOForceFunctionAttrsPass(PGOOpt ? 
PGOOpt->ColdOptType : PGOOptions::ColdFuncOpt::Default))
+MODULE_PASS("pgo-force-function-attrs",
+PGOForceFunctionAttrsPass(PGOOpt
+  ? PGOOpt->ColdOptType
+  : PGOOptions::ColdFuncOpt::Default))
 MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation())
 MODULE_PASS("memprof-module", ModuleMemProfilerPass())
 MODULE_PASS("mergefunc", MergeFunctionsPass())
@@ -178,7 +183,7 @@ MODULE_PASS_WITH_PARAMS(
 parseASanPassOptions, "kernel")
 MODULE_PASS_WITH_PARAMS(
 "cg-profile", "CGProfilePass",
-[](bool InLTOPostLink) { return CGProfilePass(InLTOPostLink);},
+[](bool InLTOPostLink) { return CGProfilePass(InLTOPostLink); },
 parseCGProfilePassOptions, "in-lto-post-link")
 MODULE_PASS_WITH_PARAMS(
 "global-merge", "GlobalMergePass",
@@ -287,7 +292,8 @@ CGSCC_PASS_WITH_PARAMS(
 FUNCTION_ANALYSIS("aa", AAManager())
 FUNCTION_ANALYSIS("access-info", LoopAccessAnalysis())
 FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis())
-FUNCTION_ANALYSIS("bb-sections-profile-reader", 
BasicBlockSectionsProfileReaderAnalysis(TM))
+FUNCTION_ANALYSIS("bb-sections-profile-reader",
+  BasicBlockSectionsProfileReaderAnalysis(TM))
 FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis())
 FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis())
 FUNCTION_ANALYSIS("cycles", CycleAnalysis())
@@ -377,7 +383,7 @@ FUNCTION_PASS("expand-large-div-rem", 
ExpandLargeDivRemPass(TM))
 FUNCTION_PASS("expand-fp", ExpandFpPass(TM))
 FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM))
 FUNCTION_PASS("extra-vector-passes",
-  ExtraFunctionPassManager())
+  ExtraFunctionPassManager())
 FUNCTION_PASS("fix-irreducible", FixIrreduciblePass())
 FUNCTION_PASS("flatten-cfg", FlattenCFGPass())
 FUNCTION_PASS("float2int", Float2IntPass())
@@ -548,8 +554,7 @@ FUNCTION_PASS_WITH_PARAMS(
 "max-iterations=N")
 FUNCTION_PASS_WITH_PARAMS(
 "lint", "LintPass",
-[](bool AbortOnError) { return LintPass(AbortOnError); },
-parseLintOptions,
+[](bool AbortOnError) { return LintPass(AbortOnError); }, parseLintOptions,
 "abort-on-error")
 FUNCTION_PASS_WITH_PARAMS(
 "loop-unroll", "LoopUnrollPass",
@@ -576,7 +581,8 @@ FUNCTION_PASS_WITH_PARAMS(
 "normalize", "IRNormalizerPass",
 [](IRNormalizerOptions Options) { return IRNormalizerPass(Options); },
 parseIRNormalizerPassOptions,
-
"no-preserve-order;preserve-order;no-rename-all;rename-all;no-fold-all;fold-all;no-reorder-operands;reorder-operands")
+"no-preserve-order;preserve-order;no-rename-all;rename-all;no-fold-all;"
+"fold-all;no-reorder-operands;reorder-operands")
 FUNCTION_PASS_WITH_PARAMS(
 "mldst-motion", "MergedLoadStoreMotionPass",
 [](MergedLoadStoreMotionOptions Opts) {
@@ -590,7 +596,7 @@ FUNCTION_PASS_WITH_PARAMS(
 },
 [](StringRe

[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)

2025-06-17 Thread Erick Velez via llvm-branch-commits

https://github.com/evelez7 edited 
https://github.com/llvm/llvm-project/pull/144430
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Improve error accumulation in root signature parsing (PR #144465)

2025-06-17 Thread via llvm-branch-commits

https://github.com/joaosaffran updated 
https://github.com/llvm/llvm-project/pull/144465

>From ef396288dc10569cb1298e11ac4c67de6a5b5e03 Mon Sep 17 00:00:00 2001
From: joaosaffran 
Date: Mon, 16 Jun 2025 21:54:47 +
Subject: [PATCH 1/2] allowing multiple errors

---
 llvm/lib/Target/DirectX/DXILRootSignature.cpp | 250 ++
 1 file changed, 141 insertions(+), 109 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp 
b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
index 57d5ee8ac467c..a09398864259a 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
@@ -141,14 +141,14 @@ static bool parseRootFlags(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
 
   if (RootFlagNode->getNumOperands() != 2)
 return reportError(Ctx, "Invalid format for RootFlag Element");
-
+  bool HasError = false;
   if (std::optional Val = extractMdIntValue(RootFlagNode, 1))
 RSD.Flags = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootFlagNode",
-   RootFlagNode, 1);
+HasError = HasError || reportInvalidTypeError(
+   Ctx, "RootFlagNode", RootFlagNode, 1);
 
-  return false;
+  return HasError;
 }
 
 static bool parseRootConstants(LLVMContext *Ctx, mcdxbc::RootSignatureDesc 
&RSD,
@@ -157,6 +157,7 @@ static bool parseRootConstants(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
   if (RootConstantNode->getNumOperands() != 5)
 return reportError(Ctx, "Invalid format for RootConstants Element");
 
+  bool HasError = false;
   dxbc::RTS0::v1::RootParameterHeader Header;
   // The parameter offset doesn't matter here - we recalculate it during
   // serialization  Header.ParameterOffset = 0;
@@ -166,31 +167,31 @@ static bool parseRootConstants(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
   if (std::optional Val = extractMdIntValue(RootConstantNode, 1))
 Header.ShaderVisibility = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootConstantNode",
-   RootConstantNode, 1);
+HasError = HasError || reportInvalidTypeError(
+   Ctx, "RootConstantNode", RootConstantNode, 1);
 
   dxbc::RTS0::v1::RootConstants Constants;
   if (std::optional Val = extractMdIntValue(RootConstantNode, 2))
 Constants.ShaderRegister = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootConstantNode",
-   RootConstantNode, 2);
+HasError = HasError || reportInvalidTypeError(
+   Ctx, "RootConstantNode", RootConstantNode, 2);
 
   if (std::optional Val = extractMdIntValue(RootConstantNode, 3))
 Constants.RegisterSpace = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootConstantNode",
-   RootConstantNode, 3);
+HasError = HasError || reportInvalidTypeError(
+   Ctx, "RootConstantNode", RootConstantNode, 3);
 
   if (std::optional Val = extractMdIntValue(RootConstantNode, 4))
 Constants.Num32BitValues = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootConstantNode",
-   RootConstantNode, 4);
-
-  RSD.ParametersContainer.addParameter(Header, Constants);
+HasError = HasError || reportInvalidTypeError(
+   Ctx, "RootConstantNode", RootConstantNode, 4);
+  if (!HasError)
+RSD.ParametersContainer.addParameter(Header, Constants);
 
-  return false;
+  return HasError;
 }
 
 static bool parseRootDescriptors(LLVMContext *Ctx,
@@ -205,6 +206,7 @@ static bool parseRootDescriptors(LLVMContext *Ctx,
   if (RootDescriptorNode->getNumOperands() != 5)
 return reportError(Ctx, "Invalid format for Root Descriptor Element");
 
+  bool HasError = false;
   dxbc::RTS0::v1::RootParameterHeader Header;
   switch (ElementKind) {
   case RootSignatureElementKind::SRV:
@@ -224,36 +226,41 @@ static bool parseRootDescriptors(LLVMContext *Ctx,
   if (std::optional Val = extractMdIntValue(RootDescriptorNode, 1))
 Header.ShaderVisibility = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootDescriptorNode",
-   RootDescriptorNode, 1);
+HasError = HasError ||
+   reportInvalidTypeError(Ctx, "RootDescriptorNode",
+   RootDescriptorNode, 1);
 
   dxbc::RTS0::v2::RootDescriptor Descriptor;
   if (std::optional Val = extractMdIntValue(RootDescriptorNode, 2))
 Descriptor.ShaderRegister = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootDescriptorNode",
-   RootDescriptorNode, 2);
+HasError = HasError ||
+   reportInvalidTypeError(Ctx, "RootDescriptorNode",
+   RootDescriptorNode, 2);
 
   if (std::optional Val = extrac

[llvm-branch-commits] [llvm] [Remarks] Auto-detect remark parser format (PR #144554)

2025-06-17 Thread Tobias Stadler via llvm-branch-commits

https://github.com/tobias-stadler updated 
https://github.com/llvm/llvm-project/pull/144554

>From a428e237fcc52830549144bf3afdcddb29742b0d Mon Sep 17 00:00:00 2001
From: Tobias Stadler 
Date: Mon, 16 Jun 2025 15:32:15 +0100
Subject: [PATCH 1/2] [Remarks] Auto-detect remark parser format

Add remark format 'Auto', which performs automatic detection of the
remark format using the magic numbers at the beginning of the remarks
files.

The RemarkLinker already did something similar, so we streamlined this
and exposed this to llvm-remarkutil.

Depends on #144527
---
 llvm/include/llvm/Remarks/RemarkFormat.h  |  5 -
 llvm/include/llvm/Remarks/RemarkLinker.h  |  5 ++---
 llvm/lib/Remarks/RemarkFormat.cpp | 18 +++-
 llvm/lib/Remarks/RemarkLinker.cpp | 14 +++--
 llvm/lib/Remarks/RemarkParser.cpp | 21 +--
 llvm/lib/Remarks/RemarkSerializer.cpp |  6 --
 .../Inputs/broken-remark-magic.bitstream  |  1 +
 .../llvm-remarkutil/annotation-count.test |  2 ++
 .../broken-bitstream-remark-magic.test|  6 ++
 .../tools/llvm-remarkutil/empty-file.test |  5 +
 .../llvm-remarkutil/instruction-count.test|  4 +++-
 .../llvm-remarkutil/instruction-mix.test  |  4 +++-
 .../size-diff/no-difference.test  |  3 +++
 .../tools/llvm-remarkutil/RemarkUtilHelpers.h |  9 +---
 llvm/unittests/Remarks/RemarksLinkingTest.cpp |  4 +---
 15 files changed, 75 insertions(+), 32 deletions(-)
 create mode 100644 
llvm/test/tools/llvm-remarkutil/Inputs/broken-remark-magic.bitstream
 create mode 100644 
llvm/test/tools/llvm-remarkutil/broken-bitstream-remark-magic.test

diff --git a/llvm/include/llvm/Remarks/RemarkFormat.h 
b/llvm/include/llvm/Remarks/RemarkFormat.h
index a39a013dcf905..eda201d4ee6f1 100644
--- a/llvm/include/llvm/Remarks/RemarkFormat.h
+++ b/llvm/include/llvm/Remarks/RemarkFormat.h
@@ -23,7 +23,7 @@ namespace remarks {
 constexpr StringLiteral Magic("REMARKS");
 
 /// The format used for serializing/deserializing remarks.
-enum class Format { Unknown, YAML, Bitstream };
+enum class Format { Unknown, Auto, YAML, Bitstream };
 
 /// Parse and validate a string for the remark format.
 LLVM_ABI Expected parseFormat(StringRef FormatStr);
@@ -31,6 +31,9 @@ LLVM_ABI Expected parseFormat(StringRef FormatStr);
 /// Parse and validate a magic number to a remark format.
 LLVM_ABI Expected magicToFormat(StringRef Magic);
 
+/// Detect format based on selected format and magic number
+LLVM_ABI Expected detectFormat(Format Selected, StringRef Magic);
+
 } // end namespace remarks
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Remarks/RemarkLinker.h 
b/llvm/include/llvm/Remarks/RemarkLinker.h
index 5343c62144708..67208f40592a5 100644
--- a/llvm/include/llvm/Remarks/RemarkLinker.h
+++ b/llvm/include/llvm/Remarks/RemarkLinker.h
@@ -80,13 +80,12 @@ struct RemarkLinker {
   /// \p Buffer.
   /// \p Buffer can be either a standalone remark container or just
   /// metadata. This takes care of uniquing and merging the remarks.
-  LLVM_ABI Error link(StringRef Buffer,
-  std::optional RemarkFormat = std::nullopt);
+  LLVM_ABI Error link(StringRef Buffer, Format RemarkFormat = Format::Auto);
 
   /// Link the remarks found in \p Obj by looking for the right section and
   /// calling the method above.
   LLVM_ABI Error link(const object::ObjectFile &Obj,
-  std::optional RemarkFormat = std::nullopt);
+  Format RemarkFormat = Format::Auto);
 
   /// Serialize the linked remarks to the stream \p OS, using the format \p
   /// RemarkFormat.
diff --git a/llvm/lib/Remarks/RemarkFormat.cpp 
b/llvm/lib/Remarks/RemarkFormat.cpp
index 800f5bffe70da..1c52e352f9392 100644
--- a/llvm/lib/Remarks/RemarkFormat.cpp
+++ b/llvm/lib/Remarks/RemarkFormat.cpp
@@ -42,6 +42,22 @@ Expected llvm::remarks::magicToFormat(StringRef 
MagicStr) {
 
   if (Result == Format::Unknown)
 return createStringError(std::make_error_code(std::errc::invalid_argument),
- "Unknown remark magic: '%s'", MagicStr.data());
+ "Automatic detection of remark format failed. "
+ "Unknown magic number: '%.4s'",
+ MagicStr.data());
   return Result;
 }
+
+Expected llvm::remarks::detectFormat(Format Selected,
+ StringRef MagicStr) {
+  if (Selected == Format::Unknown)
+return createStringError(std::make_error_code(std::errc::invalid_argument),
+ "Unknown remark parser format.");
+  if (Selected != Format::Auto)
+return Selected;
+
+  // Empty files are valid bitstream files
+  if (MagicStr.empty())
+return Format::Bitstream;
+  return magicToFormat(MagicStr);
+}
diff --git a/llvm/lib/Remarks/RemarkLinker.cpp 
b/llvm/lib/Remarks/RemarkLinker.cpp
index b8395aa135d82..0ca6217edfddd 100644
--- a/llvm/lib

[llvm-branch-commits] [llvm] [DirectX] Improve error accumulation in root signature parsing (PR #144465)

2025-06-17 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff HEAD~1 HEAD --extensions cpp -- 
llvm/lib/Target/DirectX/DXILRootSignature.cpp
``





View the diff from clang-format here.


``diff
diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp 
b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
index e96c680c6..eea46e714 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
@@ -145,8 +145,9 @@ static bool parseRootFlags(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
   if (std::optional Val = extractMdIntValue(RootFlagNode, 1))
 RSD.Flags = *Val;
   else
-HasError = reportInvalidTypeError(
-   Ctx, "RootFlagNode", RootFlagNode, 1) || 
HasError;
+HasError = reportInvalidTypeError(Ctx, "RootFlagNode",
+   RootFlagNode, 1) ||
+   HasError;
 
   return HasError;
 }
@@ -167,27 +168,31 @@ static bool parseRootConstants(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
   if (std::optional Val = extractMdIntValue(RootConstantNode, 1))
 Header.ShaderVisibility = *Val;
   else
-HasError = reportInvalidTypeError(
-   Ctx, "RootConstantNode", RootConstantNode, 1) 
|| HasError;
+HasError = reportInvalidTypeError(Ctx, "RootConstantNode",
+   RootConstantNode, 1) ||
+   HasError;
 
   dxbc::RTS0::v1::RootConstants Constants;
   if (std::optional Val = extractMdIntValue(RootConstantNode, 2))
 Constants.ShaderRegister = *Val;
   else
-HasError = reportInvalidTypeError (
-   Ctx, "RootConstantNode", RootConstantNode, 2) 
|| HasError;
+HasError = reportInvalidTypeError(Ctx, "RootConstantNode",
+   RootConstantNode, 2) ||
+   HasError;
 
   if (std::optional Val = extractMdIntValue(RootConstantNode, 3))
 Constants.RegisterSpace = *Val;
   else
-HasError = reportInvalidTypeError (
-   Ctx, "RootConstantNode", RootConstantNode, 3) 
|| HasError;
+HasError = reportInvalidTypeError(Ctx, "RootConstantNode",
+   RootConstantNode, 3) ||
+   HasError;
 
   if (std::optional Val = extractMdIntValue(RootConstantNode, 4))
 Constants.Num32BitValues = *Val;
   else
-HasError = reportInvalidTypeError (
-   Ctx, "RootConstantNode", RootConstantNode, 4) 
|| HasError;
+HasError = reportInvalidTypeError(Ctx, "RootConstantNode",
+   RootConstantNode, 4) ||
+   HasError;
   if (!HasError)
 RSD.ParametersContainer.addParameter(Header, Constants);
 
@@ -226,24 +231,24 @@ static bool parseRootDescriptors(LLVMContext *Ctx,
   if (std::optional Val = extractMdIntValue(RootDescriptorNode, 1))
 Header.ShaderVisibility = *Val;
   else
-HasError =
-   reportInvalidTypeError(Ctx, "RootDescriptorNode",
-   RootDescriptorNode, 1)|| 
HasError;
+HasError = reportInvalidTypeError(Ctx, "RootDescriptorNode",
+   RootDescriptorNode, 1) ||
+   HasError;
 
   dxbc::RTS0::v2::RootDescriptor Descriptor;
   if (std::optional Val = extractMdIntValue(RootDescriptorNode, 2))
 Descriptor.ShaderRegister = *Val;
   else
-HasError =
-   reportInvalidTypeError(Ctx, "RootDescriptorNode",
-   RootDescriptorNode, 2)|| 
HasError;
+HasError = reportInvalidTypeError(Ctx, "RootDescriptorNode",
+   RootDescriptorNode, 2) ||
+   HasError;
 
   if (std::optional Val = extractMdIntValue(RootDescriptorNode, 3))
 Descriptor.RegisterSpace = *Val;
   else
-HasError =
-   reportInvalidTypeError(Ctx, "RootDescriptorNode",
-   RootDescriptorNode, 3)|| 
HasError;
+HasError = reportInvalidTypeError(Ctx, "RootDescriptorNode",
+   RootDescriptorNode, 3) ||
+   HasError;
 
   if (RSD.Version == 1) {
 if (!HasError)
@@ -255,9 +260,9 @@ static bool parseRootDescriptors(LLVMContext *Ctx,
   if (std::optional Val = extractMdIntValue(RootDescriptorNode, 4))
 Descriptor.Flags = *Val;
   else
-HasError =
-   reportInvalidTypeError(Ctx, "RootDescriptorNode",
-   RootDescriptorNode, 4)|| 
HasError;
+HasError = reportInvalidTypeError(Ctx, "RootDescriptorNode",
+   RootDes

[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)

2025-06-17 Thread Erick Velez via llvm-branch-commits

https://github.com/evelez7 updated 
https://github.com/llvm/llvm-project/pull/144430

>From 72e4a2441b93546c3e275078d1525ae01e73e361 Mon Sep 17 00:00:00 2001
From: Erick Velez 
Date: Mon, 16 Jun 2025 10:50:35 -0700
Subject: [PATCH] remove extraneous change

---
 clang-tools-extra/clang-doc/BitcodeReader.cpp |  72 +++
 clang-tools-extra/clang-doc/BitcodeWriter.cpp |  44 ++-
 clang-tools-extra/clang-doc/BitcodeWriter.h   |  12 +-
 clang-tools-extra/clang-doc/HTMLGenerator.cpp |   4 +
 .../clang-doc/HTMLMustacheGenerator.cpp   |   2 +
 clang-tools-extra/clang-doc/JSONGenerator.cpp |  46 +++
 clang-tools-extra/clang-doc/MDGenerator.cpp   |   5 +
 clang-tools-extra/clang-doc/Mapper.cpp|   4 +
 clang-tools-extra/clang-doc/Mapper.h  |   1 +
 .../clang-doc/Representation.cpp  |  13 ++
 clang-tools-extra/clang-doc/Representation.h  |  26 +++-
 clang-tools-extra/clang-doc/Serialize.cpp |  90 +
 clang-tools-extra/clang-doc/Serialize.h   |   4 +
 clang-tools-extra/clang-doc/YAMLGenerator.cpp |   2 +
 .../test/clang-doc/json/class-requires.cpp|  18 +--
 .../clang-doc/json/compound-constraints.cpp   | 121 ++
 .../test/clang-doc/json/concept.cpp   |  48 +++
 .../test/clang-doc/json/function-requires.cpp |  36 +++---
 .../unittests/clang-doc/BitcodeTest.cpp   |   2 +
 19 files changed, 494 insertions(+), 56 deletions(-)
 create mode 100644 
clang-tools-extra/test/clang-doc/json/compound-constraints.cpp

diff --git a/clang-tools-extra/clang-doc/BitcodeReader.cpp 
b/clang-tools-extra/clang-doc/BitcodeReader.cpp
index 35058abab0663..5b70280e7dba8 100644
--- a/clang-tools-extra/clang-doc/BitcodeReader.cpp
+++ b/clang-tools-extra/clang-doc/BitcodeReader.cpp
@@ -92,6 +92,7 @@ static llvm::Error decodeRecord(const Record &R, InfoType 
&Field,
   case InfoType::IT_default:
   case InfoType::IT_enum:
   case InfoType::IT_typedef:
+  case InfoType::IT_concept:
 Field = IT;
 return llvm::Error::success();
   }
@@ -108,6 +109,7 @@ static llvm::Error decodeRecord(const Record &R, FieldId 
&Field,
   case FieldId::F_type:
   case FieldId::F_child_namespace:
   case FieldId::F_child_record:
+  case FieldId::F_concept:
   case FieldId::F_default:
 Field = F;
 return llvm::Error::success();
@@ -391,6 +393,29 @@ static llvm::Error parseRecord(const Record &R, unsigned 
ID,
  "invalid field for TemplateParamInfo");
 }
 
+static llvm::Error parseRecord(const Record &R, unsigned ID,
+   llvm::StringRef Blob, ConceptInfo *I) {
+  switch (ID) {
+  case CONCEPT_USR:
+return decodeRecord(R, I->USR, Blob);
+  case CONCEPT_NAME:
+return decodeRecord(R, I->Name, Blob);
+  case CONCEPT_IS_TYPE:
+return decodeRecord(R, I->IsType, Blob);
+  case CONCEPT_CONSTRAINT_EXPRESSION:
+return decodeRecord(R, I->ConstraintExpression, Blob);
+  }
+  llvm_unreachable("invalid field for ConceptInfo");
+}
+
+static llvm::Error parseRecord(const Record &R, unsigned ID,
+   llvm::StringRef Blob, ConstraintInfo *I) {
+  if (ID == CONSTRAINT_EXPRESSION)
+return decodeRecord(R, I->Expression, Blob);
+  return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "invalid field for ConstraintInfo");
+}
+
 template  static llvm::Expected getCommentInfo(T I) 
{
   return llvm::createStringError(llvm::inconvertibleErrorCode(),
  "invalid type cannot contain CommentInfo");
@@ -429,6 +454,10 @@ template <> llvm::Expected 
getCommentInfo(CommentInfo *I) {
   return I->Children.back().get();
 }
 
+template <> llvm::Expected getCommentInfo(ConceptInfo *I) {
+  return &I->Description.emplace_back();
+}
+
 // When readSubBlock encounters a TypeInfo sub-block, it calls addTypeInfo on
 // the parent block to set it. The template specializations define what to do
 // for each supported parent block.
@@ -584,6 +613,18 @@ template <> llvm::Error addReference(RecordInfo *I, 
Reference &&R, FieldId F) {
   }
 }
 
+template <>
+llvm::Error addReference(ConstraintInfo *I, Reference &&R, FieldId F) {
+  switch (F) {
+  case FieldId::F_concept:
+I->ConceptRef = std::move(R);
+return llvm::Error::success();
+  default:
+return llvm::createStringError(llvm::inconvertibleErrorCode(),
+   "invalid type cannot contain Reference");
+  }
+}
+
 template 
 static void addChild(T I, ChildInfoType &&R) {
   llvm::errs() << "invalid child type for info";
@@ -600,6 +641,9 @@ template <> void addChild(NamespaceInfo *I, EnumInfo &&R) {
 template <> void addChild(NamespaceInfo *I, TypedefInfo &&R) {
   I->Children.Typedefs.emplace_back(std::move(R));
 }
+template <> void addChild(NamespaceInfo *I, ConceptInfo &&R) {
+  I->Children.Concepts.emplace_back(std::move(R));
+}
 
 // Record children:
 template <> void addChild(RecordInfo *I, FunctionInfo &&R) {
@@ -649,6 +693,9 @@ 

[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)

2025-06-17 Thread Erick Velez via llvm-branch-commits

evelez7 wrote:

Linux CI shows failing but looks like all tests passed despite that.

Added compound constraint support which just goes through the nested 
expressions until it reaches the constraint.

https://github.com/llvm/llvm-project/pull/144430
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Remarks] Auto-detect remark parser format (PR #144554)

2025-06-17 Thread Jon Roelofs via llvm-branch-commits

https://github.com/jroelofs approved this pull request.

Love it!

https://github.com/llvm/llvm-project/pull/144554
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Improve error handling and validation in root signature parsing (PR #144577)

2025-06-17 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-directx

Author: None (joaosaffran)


Changes

This patch enhances error handling and validation in the DirectX backend's root 
signature parsing. The changes include:

1. **Improved Error Reporting**:
   - Introduced `reportInvalidTypeError` utility to provide detailed error 
messages for type mismatches.
   - Enhanced diagnostic messages for invalid metadata nodes and values.

2. **Validation Updates**:
   - Added stricter validation for descriptor tables and static samplers.
   - Improved handling of invalid values for filter modes, address modes, and 
LOD parameters.

Example changes:
```cpp
if (Element == nullptr)
  return reportInvalidTypeError(Ctx, "DescriptorTableNode", 
DescriptorTableNode, I);

if (std::optional Val = extractMdIntValue(StaticSamplerNode, 1))
  Sampler.Filter = *Val;
else
  return reportInvalidTypeError(Ctx, "StaticSamplerNode", 
StaticSamplerNode, 1);
  ```
Testing:
- Validation of invalid metadata nodes and values.
- Proper diagnostic messages for type mismatches.
- All existing DirectX backend tests continue to pass.


---
Full diff: https://github.com/llvm/llvm-project/pull/144577.diff


4 Files Affected:

- (modified) llvm/lib/Target/DirectX/DXILRootSignature.cpp (+125-31) 
- (modified) 
llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootConstants-Invalid-Num32BitValues.ll
 (+1-1) 
- (modified) 
llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootConstants-Invalid-RegisterSpace.ll
 (+1-1) 
- (modified) 
llvm/test/CodeGen/DirectX/ContainerData/RootSignature-RootConstants-Invalid-ShaderRegister.ll
 (+1-1) 


``diff
diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp 
b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
index 3aef7d3eb1e69..57d5ee8ac467c 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
@@ -48,6 +48,71 @@ static bool reportValueError(LLVMContext *Ctx, Twine 
ParamName,
   return true;
 }
 
+// Template function to get formatted type string based on C++ type
+template  std::string getTypeFormatted() {
+  if constexpr (std::is_same_v) {
+return "string";
+  } else if constexpr (std::is_same_v ||
+   std::is_same_v) {
+return "metadata";
+  } else if constexpr (std::is_same_v ||
+   std::is_same_v) {
+return "constant";
+  } else if constexpr (std::is_same_v) {
+return "constant";
+  } else if constexpr (std::is_same_v ||
+   std::is_same_v) {
+return "constant int";
+  } else if constexpr (std::is_same_v) {
+return "constant int";
+  }
+  return "unknown";
+}
+
+// Helper function to get the actual type of a metadata operand
+std::string getActualMDType(const MDNode *Node, unsigned Index) {
+  if (!Node || Index >= Node->getNumOperands())
+return "null";
+
+  Metadata *Op = Node->getOperand(Index);
+  if (!Op)
+return "null";
+
+  if (isa(Op))
+return getTypeFormatted();
+
+  if (isa(Op)) {
+if (auto *CAM = dyn_cast(Op)) {
+  Type *T = CAM->getValue()->getType();
+  if (T->isIntegerTy())
+return (Twine("i") + Twine(T->getIntegerBitWidth())).str();
+  if (T->isFloatingPointTy())
+return T->isFloatTy()? getTypeFormatted()
+   : T->isDoubleTy() ? getTypeFormatted()
+ : "fp";
+
+  return getTypeFormatted();
+}
+  }
+  if (isa(Op))
+return getTypeFormatted();
+
+  return "unknown";
+}
+
+// Helper function to simplify error reporting for invalid metadata values
+template 
+auto reportInvalidTypeError(LLVMContext *Ctx, Twine ParamName,
+const MDNode *Node, unsigned Index) {
+  std::string ExpectedType = getTypeFormatted();
+  std::string ActualType = getActualMDType(Node, Index);
+
+  return reportError(Ctx, "Root Signature Node: " + ParamName +
+  " expected metadata node of type " +
+  ExpectedType + " at index " + Twine(Index) +
+  " but got " + ActualType);
+}
+
 static std::optional extractMdIntValue(MDNode *Node,
  unsigned int OpId) {
   if (auto *CI =
@@ -80,7 +145,8 @@ static bool parseRootFlags(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
   if (std::optional Val = extractMdIntValue(RootFlagNode, 1))
 RSD.Flags = *Val;
   else
-return reportError(Ctx, "Invalid value for RootFlag");
+return reportInvalidTypeError(Ctx, "RootFlagNode",
+   RootFlagNode, 1);
 
   return false;
 }
@@ -100,23 +166,27 @@ static bool parseRootConstants(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
   if (std::optional Val = extractMdIntValue(RootConstantNode, 1))
 Header.ShaderVisibility = *Val;
   else
-return reportError(Ctx, "Invalid value for ShaderVisibility");
+return reportInvalidTypeError(Ctx, "Ro

[llvm-branch-commits] [llvm] [DirectX] Improve error handling and validation in root signature parsing (PR #144577)

2025-06-17 Thread via llvm-branch-commits

https://github.com/joaosaffran ready_for_review 
https://github.com/llvm/llvm-project/pull/144577
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Improve error accumulation in root signature parsing (PR #144465)

2025-06-17 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-directx

Author: None (joaosaffran)


Changes

This patch enhances error handling in the DirectX backend's root signature 
parsing, specifically in DXILRootSignature.cpp. The changes include:

1. Modify error handling to accumulate errors:
   - Replace early returns with error accumulation using HasError
   - Allow validation to continue after encountering an invalid type
   - Maintain original error reporting functionality while collecting multiple 
errors

2. Fix root flag parsing:
   - Use boolean accumulator for multiple validation errors
   - Improve invalid type reporting for root flag nodes
   - Maintain consistency with existing error reporting patterns

Before this change, the parser would stop at the first error encountered. Now 
it 
continues validation, collecting all errors before returning. This provides a 
better
developer experience by showing all issues that need to be fixed at once.

Example of changes:
```cpp
bool HasError = false;
if (std::optional Val = extractMdIntValue(RootFlagNode, 1))
  RSD.Flags = *Val;
else
  HasError = HasError || reportInvalidTypeError(
Ctx, "RootFlagNode", RootFlagNode, 1);
return HasError;
```

Testing:
- All existing DirectX backend tests pass
- Verified error accumulation with multiple validation failures
- Root signature parsing continues to work as expected

---

Patch is 25.66 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/144465.diff


2 Files Affected:

- (modified) llvm/lib/Target/DirectX/DXILRootSignature.cpp (+171-110) 
- (added) 
llvm/test/CodeGen/DirectX/ContainerData/RootSignature-Error-Accumulation.ll 
(+23) 


``diff
diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp 
b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
index 57d5ee8ac467c..eea46e714b756 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
@@ -141,14 +141,15 @@ static bool parseRootFlags(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
 
   if (RootFlagNode->getNumOperands() != 2)
 return reportError(Ctx, "Invalid format for RootFlag Element");
-
+  bool HasError = false;
   if (std::optional Val = extractMdIntValue(RootFlagNode, 1))
 RSD.Flags = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootFlagNode",
-   RootFlagNode, 1);
+HasError = reportInvalidTypeError(Ctx, "RootFlagNode",
+   RootFlagNode, 1) ||
+   HasError;
 
-  return false;
+  return HasError;
 }
 
 static bool parseRootConstants(LLVMContext *Ctx, mcdxbc::RootSignatureDesc 
&RSD,
@@ -157,6 +158,7 @@ static bool parseRootConstants(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
   if (RootConstantNode->getNumOperands() != 5)
 return reportError(Ctx, "Invalid format for RootConstants Element");
 
+  bool HasError = false;
   dxbc::RTS0::v1::RootParameterHeader Header;
   // The parameter offset doesn't matter here - we recalculate it during
   // serialization  Header.ParameterOffset = 0;
@@ -166,31 +168,35 @@ static bool parseRootConstants(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
   if (std::optional Val = extractMdIntValue(RootConstantNode, 1))
 Header.ShaderVisibility = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootConstantNode",
-   RootConstantNode, 1);
+HasError = reportInvalidTypeError(Ctx, "RootConstantNode",
+   RootConstantNode, 1) ||
+   HasError;
 
   dxbc::RTS0::v1::RootConstants Constants;
   if (std::optional Val = extractMdIntValue(RootConstantNode, 2))
 Constants.ShaderRegister = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootConstantNode",
-   RootConstantNode, 2);
+HasError = reportInvalidTypeError(Ctx, "RootConstantNode",
+   RootConstantNode, 2) ||
+   HasError;
 
   if (std::optional Val = extractMdIntValue(RootConstantNode, 3))
 Constants.RegisterSpace = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootConstantNode",
-   RootConstantNode, 3);
+HasError = reportInvalidTypeError(Ctx, "RootConstantNode",
+   RootConstantNode, 3) ||
+   HasError;
 
   if (std::optional Val = extractMdIntValue(RootConstantNode, 4))
 Constants.Num32BitValues = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootConstantNode",
-   RootConstantNode, 4);
-
-  RSD.ParametersContainer.addParameter(Header, Constants);
+HasError = reportInvalidTypeError(Ctx, "RootConstantNode",
+   RootConstantNode, 4)

[llvm-branch-commits] [clang] [CIR] Upstream ComplexImagPtrOp for ComplexType (PR #144236)

2025-06-17 Thread Bruno Cardoso Lopes via llvm-branch-commits

https://github.com/bcardosolopes commented:

This might look a bit different given the residesign of the other pieces, so 
I'm gonna hold on reviewing this for now, let me know when this is ready again.

https://github.com/llvm/llvm-project/pull/144236
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [CIR] Upstream __real__ for ComplexType (PR #144261)

2025-06-17 Thread Bruno Cardoso Lopes via llvm-branch-commits

bcardosolopes wrote:

This might look a bit different given the redesign of the other pieces, so I'm 
gonna hold on reviewing this for now, let me know when this is ready again.

https://github.com/llvm/llvm-project/pull/144261
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [lld] [llvm] [mlir] [HLSL] Add descriptor table metadata parsing (PR #142492)

2025-06-17 Thread via llvm-branch-commits

https://github.com/joaosaffran updated 
https://github.com/llvm/llvm-project/pull/142492

error: too big or took too long to generate
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [CIR] Upstream __imag__ for ComplexType (PR #144262)

2025-06-17 Thread Bruno Cardoso Lopes via llvm-branch-commits

bcardosolopes wrote:

This might look a bit different given the redesign of the other pieces, so I'm 
gonna hold on reviewing this for now, let me know when this is ready again.

https://github.com/llvm/llvm-project/pull/144262
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)

2025-06-17 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-tools-extra

Author: Erick Velez (evelez7)


Changes



---

Patch is 40.83 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/144430.diff


19 Files Affected:

- (modified) clang-tools-extra/clang-doc/BitcodeReader.cpp (+72) 
- (modified) clang-tools-extra/clang-doc/BitcodeWriter.cpp (+41-3) 
- (modified) clang-tools-extra/clang-doc/BitcodeWriter.h (+11-1) 
- (modified) clang-tools-extra/clang-doc/HTMLGenerator.cpp (+4) 
- (modified) clang-tools-extra/clang-doc/HTMLMustacheGenerator.cpp (+2) 
- (modified) clang-tools-extra/clang-doc/JSONGenerator.cpp (+46) 
- (modified) clang-tools-extra/clang-doc/MDGenerator.cpp (+5) 
- (modified) clang-tools-extra/clang-doc/Mapper.cpp (+4) 
- (modified) clang-tools-extra/clang-doc/Mapper.h (+1) 
- (modified) clang-tools-extra/clang-doc/Representation.cpp (+13) 
- (modified) clang-tools-extra/clang-doc/Representation.h (+25-1) 
- (modified) clang-tools-extra/clang-doc/Serialize.cpp (+91-1) 
- (modified) clang-tools-extra/clang-doc/Serialize.h (+4) 
- (modified) clang-tools-extra/clang-doc/YAMLGenerator.cpp (+2) 
- (modified) clang-tools-extra/test/clang-doc/json/class-requires.cpp (+9-9) 
- (added) clang-tools-extra/test/clang-doc/json/compound-constraints.cpp (+121) 
- (modified) clang-tools-extra/test/clang-doc/json/concept.cpp (+24-24) 
- (modified) clang-tools-extra/test/clang-doc/json/function-requires.cpp 
(+18-18) 
- (modified) clang-tools-extra/unittests/clang-doc/BitcodeTest.cpp (+2) 


``diff
diff --git a/clang-tools-extra/clang-doc/BitcodeReader.cpp 
b/clang-tools-extra/clang-doc/BitcodeReader.cpp
index 35058abab0663..5b70280e7dba8 100644
--- a/clang-tools-extra/clang-doc/BitcodeReader.cpp
+++ b/clang-tools-extra/clang-doc/BitcodeReader.cpp
@@ -92,6 +92,7 @@ static llvm::Error decodeRecord(const Record &R, InfoType 
&Field,
   case InfoType::IT_default:
   case InfoType::IT_enum:
   case InfoType::IT_typedef:
+  case InfoType::IT_concept:
 Field = IT;
 return llvm::Error::success();
   }
@@ -108,6 +109,7 @@ static llvm::Error decodeRecord(const Record &R, FieldId 
&Field,
   case FieldId::F_type:
   case FieldId::F_child_namespace:
   case FieldId::F_child_record:
+  case FieldId::F_concept:
   case FieldId::F_default:
 Field = F;
 return llvm::Error::success();
@@ -391,6 +393,29 @@ static llvm::Error parseRecord(const Record &R, unsigned 
ID,
  "invalid field for TemplateParamInfo");
 }
 
+static llvm::Error parseRecord(const Record &R, unsigned ID,
+   llvm::StringRef Blob, ConceptInfo *I) {
+  switch (ID) {
+  case CONCEPT_USR:
+return decodeRecord(R, I->USR, Blob);
+  case CONCEPT_NAME:
+return decodeRecord(R, I->Name, Blob);
+  case CONCEPT_IS_TYPE:
+return decodeRecord(R, I->IsType, Blob);
+  case CONCEPT_CONSTRAINT_EXPRESSION:
+return decodeRecord(R, I->ConstraintExpression, Blob);
+  }
+  llvm_unreachable("invalid field for ConceptInfo");
+}
+
+static llvm::Error parseRecord(const Record &R, unsigned ID,
+   llvm::StringRef Blob, ConstraintInfo *I) {
+  if (ID == CONSTRAINT_EXPRESSION)
+return decodeRecord(R, I->Expression, Blob);
+  return llvm::createStringError(llvm::inconvertibleErrorCode(),
+ "invalid field for ConstraintInfo");
+}
+
 template  static llvm::Expected getCommentInfo(T I) 
{
   return llvm::createStringError(llvm::inconvertibleErrorCode(),
  "invalid type cannot contain CommentInfo");
@@ -429,6 +454,10 @@ template <> llvm::Expected 
getCommentInfo(CommentInfo *I) {
   return I->Children.back().get();
 }
 
+template <> llvm::Expected getCommentInfo(ConceptInfo *I) {
+  return &I->Description.emplace_back();
+}
+
 // When readSubBlock encounters a TypeInfo sub-block, it calls addTypeInfo on
 // the parent block to set it. The template specializations define what to do
 // for each supported parent block.
@@ -584,6 +613,18 @@ template <> llvm::Error addReference(RecordInfo *I, 
Reference &&R, FieldId F) {
   }
 }
 
+template <>
+llvm::Error addReference(ConstraintInfo *I, Reference &&R, FieldId F) {
+  switch (F) {
+  case FieldId::F_concept:
+I->ConceptRef = std::move(R);
+return llvm::Error::success();
+  default:
+return llvm::createStringError(llvm::inconvertibleErrorCode(),
+   "invalid type cannot contain Reference");
+  }
+}
+
 template 
 static void addChild(T I, ChildInfoType &&R) {
   llvm::errs() << "invalid child type for info";
@@ -600,6 +641,9 @@ template <> void addChild(NamespaceInfo *I, EnumInfo &&R) {
 template <> void addChild(NamespaceInfo *I, TypedefInfo &&R) {
   I->Children.Typedefs.emplace_back(std::move(R));
 }
+template <> void addChild(NamespaceInfo *I, ConceptInfo &&R) {
+  I->Children.Concepts.emplace_back(std::move(R));
+}
 
 // Record children:
 template <> void addChild(RecordInfo *I

[llvm-branch-commits] [BOLT] Support pre-aggregated returns (PR #143296)

2025-06-17 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/143296


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT] Support pre-aggregated returns (PR #143296)

2025-06-17 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/143296


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Improve error accumulation in root signature parsing (PR #144465)

2025-06-17 Thread via llvm-branch-commits

https://github.com/joaosaffran updated 
https://github.com/llvm/llvm-project/pull/144465

>From ef396288dc10569cb1298e11ac4c67de6a5b5e03 Mon Sep 17 00:00:00 2001
From: joaosaffran 
Date: Mon, 16 Jun 2025 21:54:47 +
Subject: [PATCH 1/3] allowing multiple errors

---
 llvm/lib/Target/DirectX/DXILRootSignature.cpp | 250 ++
 1 file changed, 141 insertions(+), 109 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp 
b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
index 57d5ee8ac467c..a09398864259a 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
@@ -141,14 +141,14 @@ static bool parseRootFlags(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
 
   if (RootFlagNode->getNumOperands() != 2)
 return reportError(Ctx, "Invalid format for RootFlag Element");
-
+  bool HasError = false;
   if (std::optional Val = extractMdIntValue(RootFlagNode, 1))
 RSD.Flags = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootFlagNode",
-   RootFlagNode, 1);
+HasError = HasError || reportInvalidTypeError(
+   Ctx, "RootFlagNode", RootFlagNode, 1);
 
-  return false;
+  return HasError;
 }
 
 static bool parseRootConstants(LLVMContext *Ctx, mcdxbc::RootSignatureDesc 
&RSD,
@@ -157,6 +157,7 @@ static bool parseRootConstants(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
   if (RootConstantNode->getNumOperands() != 5)
 return reportError(Ctx, "Invalid format for RootConstants Element");
 
+  bool HasError = false;
   dxbc::RTS0::v1::RootParameterHeader Header;
   // The parameter offset doesn't matter here - we recalculate it during
   // serialization  Header.ParameterOffset = 0;
@@ -166,31 +167,31 @@ static bool parseRootConstants(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
   if (std::optional Val = extractMdIntValue(RootConstantNode, 1))
 Header.ShaderVisibility = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootConstantNode",
-   RootConstantNode, 1);
+HasError = HasError || reportInvalidTypeError(
+   Ctx, "RootConstantNode", RootConstantNode, 1);
 
   dxbc::RTS0::v1::RootConstants Constants;
   if (std::optional Val = extractMdIntValue(RootConstantNode, 2))
 Constants.ShaderRegister = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootConstantNode",
-   RootConstantNode, 2);
+HasError = HasError || reportInvalidTypeError(
+   Ctx, "RootConstantNode", RootConstantNode, 2);
 
   if (std::optional Val = extractMdIntValue(RootConstantNode, 3))
 Constants.RegisterSpace = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootConstantNode",
-   RootConstantNode, 3);
+HasError = HasError || reportInvalidTypeError(
+   Ctx, "RootConstantNode", RootConstantNode, 3);
 
   if (std::optional Val = extractMdIntValue(RootConstantNode, 4))
 Constants.Num32BitValues = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootConstantNode",
-   RootConstantNode, 4);
-
-  RSD.ParametersContainer.addParameter(Header, Constants);
+HasError = HasError || reportInvalidTypeError(
+   Ctx, "RootConstantNode", RootConstantNode, 4);
+  if (!HasError)
+RSD.ParametersContainer.addParameter(Header, Constants);
 
-  return false;
+  return HasError;
 }
 
 static bool parseRootDescriptors(LLVMContext *Ctx,
@@ -205,6 +206,7 @@ static bool parseRootDescriptors(LLVMContext *Ctx,
   if (RootDescriptorNode->getNumOperands() != 5)
 return reportError(Ctx, "Invalid format for Root Descriptor Element");
 
+  bool HasError = false;
   dxbc::RTS0::v1::RootParameterHeader Header;
   switch (ElementKind) {
   case RootSignatureElementKind::SRV:
@@ -224,36 +226,41 @@ static bool parseRootDescriptors(LLVMContext *Ctx,
   if (std::optional Val = extractMdIntValue(RootDescriptorNode, 1))
 Header.ShaderVisibility = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootDescriptorNode",
-   RootDescriptorNode, 1);
+HasError = HasError ||
+   reportInvalidTypeError(Ctx, "RootDescriptorNode",
+   RootDescriptorNode, 1);
 
   dxbc::RTS0::v2::RootDescriptor Descriptor;
   if (std::optional Val = extractMdIntValue(RootDescriptorNode, 2))
 Descriptor.ShaderRegister = *Val;
   else
-return reportInvalidTypeError(Ctx, "RootDescriptorNode",
-   RootDescriptorNode, 2);
+HasError = HasError ||
+   reportInvalidTypeError(Ctx, "RootDescriptorNode",
+   RootDescriptorNode, 2);
 
   if (std::optional Val = extrac

[llvm-branch-commits] [clang-tools-extra] [clang-doc] add support for concepts (PR #144430)

2025-06-17 Thread Erick Velez via llvm-branch-commits

https://github.com/evelez7 ready_for_review 
https://github.com/llvm/llvm-project/pull/144430
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [CI] Test all projects when CI scripts change (PR #144034)

2025-06-17 Thread via llvm-branch-commits

cmtice wrote:

This PR failed the Linux premerge test; are you investigating that?

https://github.com/llvm/llvm-project/pull/144034
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Improve error handling and validation in root signature parsing (PR #144577)

2025-06-17 Thread via llvm-branch-commits

https://github.com/joaosaffran created 
https://github.com/llvm/llvm-project/pull/144577

This patch enhances error handling and validation in the DirectX backend's root 
signature parsing. The changes include:

1. **Improved Error Reporting**:
   - Introduced `reportInvalidTypeError` utility to provide detailed error 
messages for type mismatches.
   - Enhanced diagnostic messages for invalid metadata nodes and values.

2. **Validation Updates**:
   - Added stricter validation for descriptor tables and static samplers.
   - Improved handling of invalid values for filter modes, address modes, and 
LOD parameters.

Example changes:
```cpp
if (Element == nullptr)
  return reportInvalidTypeError(Ctx, "DescriptorTableNode", 
DescriptorTableNode, I);

if (std::optional Val = extractMdIntValue(StaticSamplerNode, 1))
  Sampler.Filter = *Val;
else
  return reportInvalidTypeError(Ctx, "StaticSamplerNode", 
StaticSamplerNode, 1);
  ```
Testing:
- Validation of invalid metadata nodes and values.
- Proper diagnostic messages for type mismatches.
- All existing DirectX backend tests continue to pass.


>From 02f1f21b8ecc608341440c573483e69c161a06d4 Mon Sep 17 00:00:00 2001
From: joaosaffran 
Date: Fri, 6 Jun 2025 20:04:00 +
Subject: [PATCH 1/2] changing error message

---
 llvm/lib/Target/DirectX/DXILRootSignature.cpp | 119 +++---
 ...re-RootConstants-Invalid-Num32BitValues.ll |   2 +-
 ...ure-RootConstants-Invalid-RegisterSpace.ll |   2 +-
 ...re-RootConstants-Invalid-ShaderRegister.ll |   2 +-
 4 files changed, 104 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp 
b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
index 3aef7d3eb1e69..3a27afc6c660f 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
@@ -12,6 +12,7 @@
 
//===--===//
 #include "DXILRootSignature.h"
 #include "DirectX.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/DXILMetadataAnalysis.h"
@@ -30,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 using namespace llvm;
@@ -48,6 +50,71 @@ static bool reportValueError(LLVMContext *Ctx, Twine 
ParamName,
   return true;
 }
 
+// Template function to get formatted type string based on C++ type
+template  std::string getTypeFormatted() {
+  if constexpr (std::is_same_v) {
+return "string";
+  } else if constexpr (std::is_same_v ||
+   std::is_same_v) {
+return "metadata";
+  } else if constexpr (std::is_same_v ||
+   std::is_same_v) {
+return "constant";
+  } else if constexpr (std::is_same_v) {
+return "constant";
+  } else if constexpr (std::is_same_v ||
+   std::is_same_v) {
+return "constant int";
+  } else if constexpr (std::is_same_v) {
+return "constant int";
+  }
+  return "unknown";
+}
+
+// Helper function to get the actual type of a metadata operand
+std::string getActualMDType(const MDNode *Node, unsigned Index) {
+  if (!Node || Index >= Node->getNumOperands())
+return "null";
+
+  Metadata *Op = Node->getOperand(Index);
+  if (!Op)
+return "null";
+
+  if (isa(Op))
+return getTypeFormatted();
+
+  if (isa(Op)) {
+if (auto *CAM = dyn_cast(Op)) {
+  Type *T = CAM->getValue()->getType();
+  if (T->isIntegerTy())
+return (Twine("i") + Twine(T->getIntegerBitWidth())).str();
+  if (T->isFloatingPointTy())
+return T->isFloatTy()? getTypeFormatted()
+   : T->isDoubleTy() ? getTypeFormatted()
+ : "fp";
+
+  return getTypeFormatted();
+}
+  }
+  if (isa(Op))
+return getTypeFormatted();
+
+  return "unknown";
+}
+
+// Helper function to simplify error reporting for invalid metadata values
+template 
+auto reportInvalidTypeError(LLVMContext *Ctx, Twine ParamName,
+const MDNode *Node, unsigned Index) {
+  std::string ExpectedType = getTypeFormatted();
+  std::string ActualType = getActualMDType(Node, Index);
+
+  return reportError(Ctx, "Root Signature Node: " + ParamName +
+  " expected metadata node of type " +
+  ExpectedType + " at index " + Twine(Index) +
+  " but got " + ActualType);
+}
+
 static std::optional extractMdIntValue(MDNode *Node,
  unsigned int OpId) {
   if (auto *CI =
@@ -80,7 +147,8 @@ static bool parseRootFlags(LLVMContext *Ctx, 
mcdxbc::RootSignatureDesc &RSD,
   if (std::optional Val = extractMdIntValue(RootFlagNode, 1))
 RSD.Flags = *Val;
   else
-return reportError(Ctx, "Invalid value for RootFlag");
+return reportInvalidTypeError(Ctx, "RootFlagNode",
+   RootFlagNode, 1);
 
   return false;
 }
@@ -100,23 +168,27 

[llvm-branch-commits] [llvm] [DirectX] Improve error accumulation in root signature parsing (PR #144465)

2025-06-17 Thread via llvm-branch-commits

https://github.com/joaosaffran edited 
https://github.com/llvm/llvm-project/pull/144465
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Improve error accumulation in root signature parsing (PR #144465)

2025-06-17 Thread via llvm-branch-commits

https://github.com/joaosaffran ready_for_review 
https://github.com/llvm/llvm-project/pull/144465
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [CI] Test all projects when CI scripts change (PR #144034)

2025-06-17 Thread via llvm-branch-commits

https://github.com/cmtice approved this pull request.


https://github.com/llvm/llvm-project/pull/144034
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 27a68db - Revert "[clang] Add managarm support (#139271)"

2025-06-17 Thread via llvm-branch-commits

Author: Aaron Ballman
Date: 2025-06-17T07:46:22-04:00
New Revision: 27a68db7ce88f6f88ac6742f2d765951d7c73fa1

URL: 
https://github.com/llvm/llvm-project/commit/27a68db7ce88f6f88ac6742f2d765951d7c73fa1
DIFF: 
https://github.com/llvm/llvm-project/commit/27a68db7ce88f6f88ac6742f2d765951d7c73fa1.diff

LOG: Revert "[clang] Add managarm support (#139271)"

This reverts commit e86740e6003739a41139d94e1643a3207f8fd8f8.

Added: 


Modified: 
clang/lib/Basic/Targets.cpp
clang/lib/Basic/Targets/OSTargets.h
clang/lib/Driver/CMakeLists.txt
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/Gnu.cpp
clang/lib/Lex/InitHeaderSearch.cpp
clang/test/Preprocessor/init.c
clang/test/Preprocessor/predefined-macros-no-warnings.c

Removed: 
clang/lib/Driver/ToolChains/Managarm.cpp
clang/lib/Driver/ToolChains/Managarm.h

clang/test/Driver/Inputs/basic_managarm_tree/lib/aarch64-managarm-mlibc/.keep

clang/test/Driver/Inputs/basic_managarm_tree/lib/riscv64-managarm-mlibc/.keep
clang/test/Driver/Inputs/basic_managarm_tree/lib/x86_64-managarm-mlibc/.keep

clang/test/Driver/Inputs/basic_managarm_tree/lib64/aarch64-managarm-mlibc/.keep

clang/test/Driver/Inputs/basic_managarm_tree/lib64/riscv64-managarm-mlibc/.keep

clang/test/Driver/Inputs/basic_managarm_tree/lib64/x86_64-managarm-mlibc/.keep

clang/test/Driver/Inputs/basic_managarm_tree/usr/include/aarch64-managarm-mlibc/c++/10/.keep
clang/test/Driver/Inputs/basic_managarm_tree/usr/include/c++/10/.keep
clang/test/Driver/Inputs/basic_managarm_tree/usr/include/c++/v1/.keep

clang/test/Driver/Inputs/basic_managarm_tree/usr/include/riscv64-managarm-mlibc/c++/10/.keep

clang/test/Driver/Inputs/basic_managarm_tree/usr/include/x86_64-managarm-mlibc/c++/10/.keep

clang/test/Driver/Inputs/basic_managarm_tree/usr/lib/aarch64-managarm-mlibc/.keep

clang/test/Driver/Inputs/basic_managarm_tree/usr/lib/gcc/aarch64-managarm-mlibc/10/crtbegin.o

clang/test/Driver/Inputs/basic_managarm_tree/usr/lib/gcc/aarch64-managarm-mlibc/10/crtbeginS.o

clang/test/Driver/Inputs/basic_managarm_tree/usr/lib/gcc/aarch64-managarm-mlibc/10/crtbeginT.o

clang/test/Driver/Inputs/basic_managarm_tree/usr/lib/gcc/riscv64-managarm-mlibc/10/crtbegin.o

clang/test/Driver/Inputs/basic_managarm_tree/usr/lib/gcc/riscv64-managarm-mlibc/10/crtbeginS.o

clang/test/Driver/Inputs/basic_managarm_tree/usr/lib/gcc/riscv64-managarm-mlibc/10/crtbeginT.o

clang/test/Driver/Inputs/basic_managarm_tree/usr/lib/gcc/x86_64-managarm-mlibc/10/crtbegin.o

clang/test/Driver/Inputs/basic_managarm_tree/usr/lib/gcc/x86_64-managarm-mlibc/10/crtbeginS.o

clang/test/Driver/Inputs/basic_managarm_tree/usr/lib/gcc/x86_64-managarm-mlibc/10/crtbeginT.o

clang/test/Driver/Inputs/basic_managarm_tree/usr/lib/riscv64-managarm-mlibc/.keep

clang/test/Driver/Inputs/basic_managarm_tree/usr/lib/x86_64-managarm-mlibc/.keep
clang/test/Driver/Inputs/basic_managarm_tree/usr/lib64/.keep
clang/test/Driver/managarm.cpp



diff  --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
index afa863308..9889141ad2085 100644
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@@ -164,9 +164,6 @@ std::unique_ptr AllocateTarget(const 
llvm::Triple &Triple,
 return std::make_unique>(Triple,
  Opts);
   }
-case llvm::Triple::Managarm:
-  return std::make_unique>(Triple,
-   Opts);
 case llvm::Triple::NetBSD:
   return std::make_unique>(Triple,
  Opts);
@@ -469,9 +466,6 @@ std::unique_ptr AllocateTarget(const 
llvm::Triple &Triple,
 return std::make_unique>(Triple,
Opts);
   }
-case llvm::Triple::Managarm:
-  return std::make_unique>(Triple,
- Opts);
 default:
   return std::make_unique(Triple, Opts);
 }
@@ -660,9 +654,6 @@ std::unique_ptr AllocateTarget(const 
llvm::Triple &Triple,
   return std::make_unique>(Triple, Opts);
 case llvm::Triple::Hurd:
   return std::make_unique>(Triple, Opts);
-case llvm::Triple::Managarm:
-  return std::make_unique>(Triple,
-Opts);
 default:
   return std::make_unique(Triple, Opts);
 }

diff  --git a/clang/lib/Basic/Targets/OSTargets.h 
b/clang/lib/Basic/Targets/OSTargets.h
index 5dac699c2bb45..d148b38d03c7c 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -395,36 +395,6 @@ class LLVM_LIBRARY_VISIBILITY LinuxTargetInfo : public 
OSTargetInfo {
   }
 };
 
-// Manag