date:20250815

[llvm-branch-commits] [llvm] [LoongArch] Reduce number of reserved relocations when relax enabled (PR #153769)

2025-08-15 Thread via llvm-branch-commits


https://github.com/zhaoqi5 updated 
https://github.com/llvm/llvm-project/pull/153769

>From f67324528d93ca3e908f39e8e89caef5ecc3e11f Mon Sep 17 00:00:00 2001
From: Qi Zhao 
Date: Fri, 15 Aug 2025 17:12:33 +0800
Subject: [PATCH 1/2] [LoongArch] Reduce number of reserved relocations when
 relax enabled

---
 .../MCTargetDesc/LoongArchAsmBackend.cpp  | 27 +++
 .../MC/LoongArch/Relocations/relax-attr.s |  7 ++---
 2 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp 
b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index ca5d27d54bb81..c6eda4fd63a6b 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -143,8 +143,6 @@ static void fixupLeb128(MCContext &Ctx, const MCFixup 
&Fixup, uint8_t *Data,
 void LoongArchAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
  const MCValue &Target, uint8_t *Data,
  uint64_t Value, bool IsResolved) {
-  if (IsResolved && shouldForceRelocation(Fixup, Target))
-IsResolved = false;
   IsResolved = addReloc(F, Fixup, Target, Value, IsResolved);
   if (!Value)
 return; // Doesn't change encoding.
@@ -176,20 +174,6 @@ void LoongArchAsmBackend::applyFixup(const MCFragment &F, 
const MCFixup &Fixup,
   }
 }
 
-bool LoongArchAsmBackend::shouldForceRelocation(const MCFixup &Fixup,
-const MCValue &Target) {
-  switch (Fixup.getKind()) {
-  default:
-return STI.hasFeature(LoongArch::FeatureRelax);
-  case FK_Data_1:
-  case FK_Data_2:
-  case FK_Data_4:
-  case FK_Data_8:
-  case FK_Data_leb128:
-return !Target.isAbsolute();
-  }
-}
-
 static inline std::pair
 getRelocPairForSize(unsigned Size) {
   switch (Size) {
@@ -484,9 +468,16 @@ bool LoongArchAsmBackend::addReloc(const MCFragment &F, 
const MCFixup &Fixup,
 return false;
   }
 
-  IsResolved = Fallback();
   // If linker relaxation is enabled and supported by the current relocation,
-  // append a RELAX relocation.
+  // generate a relocation and then append a RELAX.
+  if (Fixup.isLinkerRelaxable())
+IsResolved = false;
+  if (IsResolved && Fixup.isPCRel())
+IsResolved = isPCRelFixupResolved(Target.getAddSym(), F);
+
+  if (!IsResolved)
+Asm->getWriter().recordRelocation(F, Fixup, Target, FixedValue);
+
   if (Fixup.isLinkerRelaxable()) {
 auto FA = MCFixup::create(Fixup.getOffset(), nullptr, ELF::R_LARCH_RELAX);
 Asm->getWriter().recordRelocation(F, FA, MCValue::get(nullptr),
diff --git a/llvm/test/MC/LoongArch/Relocations/relax-attr.s 
b/llvm/test/MC/LoongArch/Relocations/relax-attr.s
index e3e9038b755d3..d94d32ebd7ab0 100644
--- a/llvm/test/MC/LoongArch/Relocations/relax-attr.s
+++ b/llvm/test/MC/LoongArch/Relocations/relax-attr.s
@@ -17,11 +17,8 @@
 # CHECKR-NEXT: 0x4 R_LARCH_CALL36 foo 0x0
 # CHECKR-NEXT: 0x4 R_LARCH_RELAX - 0x0
 # CHECKR-NEXT: 0x10 R_LARCH_B21 .L0 0x0
-# CHECKR-NEXT: 0x14 R_LARCH_B21 .L1 0x0
 # CHECKR-NEXT: 0x18 R_LARCH_B16 .L0 0x0
-# CHECKR-NEXT: 0x1C R_LARCH_B16 .L1 0x0
 # CHECKR-NEXT: 0x20 R_LARCH_B26 .L0 0x0
-# CHECKR-NEXT: 0x24 R_LARCH_B26 .L1 0x0
 # CHECKR-NEXT:   }
 # CHECKR-NEXT:   Section ({{.*}}) .rela.data {
 # CHECKR-NEXT: 0x0 R_LARCH_64 .L1 0x0
@@ -36,6 +33,10 @@
 
 .L1:
   nop
+## Relocations for branches to .L0 must be reserved and be fixed up by linker
+## when linker relaxation enabled, because of the relaxable call36 instruction.
+## Branches to .L1 can be resolved correctly at compile time, so their
+## relocations can simply be removed.
   bnez $a0, .L0
   beqz $a0, .L1
   beq  $a0, $a1, .L0

>From f491f2cf66ea530ef3a5f465b87a0dad0b4c6d5e Mon Sep 17 00:00:00 2001
From: Qi Zhao 
Date: Fri, 15 Aug 2025 20:13:59 +0800
Subject: [PATCH 2/2] remove shouldForceRelocation declaration

---
 llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h 
b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
index 1f1360119edba..f79d3aa48c54c 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
@@ -44,8 +44,6 @@ class LoongArchAsmBackend : public MCAsmBackend {
   void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
   uint8_t *Data, uint64_t Value, bool IsResolved) override;
 
-  bool shouldForceRelocation(const MCFixup &Fixup, const MCValue &Target);
-
   std::optional getFixupKind(StringRef Name) const override;
 
   MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/li

[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-llvm-binary-utilities

Author: Matt Arsenault (arsenm)


Changes

We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.

BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.

As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.

---
Full diff: https://github.com/llvm/llvm-project/pull/153801.diff


1 Files Affected:

- (modified) llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp (+32-27) 


``diff
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp 
b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index 1c5f38d0c24b8..05f2512e24a50 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -289,7 +289,6 @@ class RuntimeLibcallEmitter {
 
 /// Helper struct for the name hash table.
 struct LookupEntry {
-  StringRef FuncName;
   uint64_t Hash = 0;
   unsigned TableValue = 0;
 };
@@ -339,14 +338,17 @@ static void emitHashFunction(raw_ostream &OS) {
 /// Return the table size, maximum number of collisions for the set of hashes
 static std::pair
 computePerfectHashParameters(ArrayRef Hashes) {
-  const int SizeOverhead = 10;
-  const int NumHashes = Hashes.size();
+  // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+  const int SizeOverhead = 4;
 
   // Index derived from hash -> number of collisions.
   DenseMap Table;
 
+  unsigned NumHashes = Hashes.size();
+
   for (int MaxCollisions = 1;; ++MaxCollisions) {
-for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead * 
NumHashes;
+ N <<= 1) {
   Table.clear();
 
   bool NeedResize = false;
@@ -367,22 +369,12 @@ computePerfectHashParameters(ArrayRef Hashes) {
 
 static std::vector
 constructPerfectHashTable(ArrayRef Keywords,
-  ArrayRef Hashes, int Size, int Collisions,
-  StringToOffsetTable &OffsetTable) {
-  DenseSet Seen;
+  ArrayRef Hashes,
+  ArrayRef TableValues, int Size,
+  int Collisions, StringToOffsetTable &OffsetTable) {
   std::vector Lookup(Size * Collisions);
 
-  for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
-StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
-// We do not want to add repeated entries for cases with the same name, 
only
-// an entry for the first, with the name collision enum values immediately
-// following.
-if (!Seen.insert(ImplName).second)
-  continue;
-
-uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
-
+  for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
 uint64_t Idx = (HashValue % static_cast(Size)) *
static_cast(Collisions);
 
@@ -390,8 +382,7 @@ constructPerfectHashTable(ArrayRef 
Keywords,
 for (int J = 0; J < Collisions; ++J) {
   LookupEntry &Entry = Lookup[Idx + J];
   if (Entry.TableValue == 0) {
-Entry.FuncName = ImplName;
-Entry.TableValue = LibCallImpl.getEnumVal();
+Entry.TableValue = TableValue;
 Entry.Hash = HashValue;
 Found = true;
 break;
@@ -399,7 +390,7 @@ constructPerfectHashTable(ArrayRef 
Keywords,
 }
 
 if (!Found)
-  reportFatalInternalError("failure to hash " + ImplName);
+  reportFatalInternalError("failure to hash");
   }
 
   return Lookup;
@@ -409,15 +400,25 @@ constructPerfectHashTable(ArrayRef 
Keywords,
 void RuntimeLibcallEmitter::emitNameMatchHashTable(
 raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
   std::vector Hashes(RuntimeLibcallImplDefList.size());
+  std::vector TableValues(RuntimeLibcallImplDefList.size());
+  DenseSet SeenFuncNames;
 
   size_t MaxFuncNameSize = 0;
   size_t Index = 0;
+
   for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
 StringRef ImplName = LibCallImpl.getLibcallFuncName();
-MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
-Hashes[Index++] = hash(ImplName);
+if (SeenFuncNames.insert(ImplName).second) {
+  MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
+  TableValues[Index] = LibCallImpl.getEnumVal();
+  Hashes[Index++] = hash(ImplName);
+}
   }
 
+  // Trim excess elements from non-unique entries.
+  Hashes.resize(SeenFuncNames.size());
+  TableValues.resize(SeenFuncNames.size());
+
   LLVM_DEBUG({
 for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
   StringRef ImplName = LibCallImpl.getLibcallFuncName();
@@ -447,8 +448,9 @

[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-tablegen

Author: Matt Arsenault (arsenm)


Changes

We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.

BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.

As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.

---
Full diff: https://github.com/llvm/llvm-project/pull/153801.diff


1 Files Affected:

- (modified) llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp (+32-27) 


``diff
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp 
b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index 1c5f38d0c24b8..05f2512e24a50 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -289,7 +289,6 @@ class RuntimeLibcallEmitter {
 
 /// Helper struct for the name hash table.
 struct LookupEntry {
-  StringRef FuncName;
   uint64_t Hash = 0;
   unsigned TableValue = 0;
 };
@@ -339,14 +338,17 @@ static void emitHashFunction(raw_ostream &OS) {
 /// Return the table size, maximum number of collisions for the set of hashes
 static std::pair
 computePerfectHashParameters(ArrayRef Hashes) {
-  const int SizeOverhead = 10;
-  const int NumHashes = Hashes.size();
+  // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+  const int SizeOverhead = 4;
 
   // Index derived from hash -> number of collisions.
   DenseMap Table;
 
+  unsigned NumHashes = Hashes.size();
+
   for (int MaxCollisions = 1;; ++MaxCollisions) {
-for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead * 
NumHashes;
+ N <<= 1) {
   Table.clear();
 
   bool NeedResize = false;
@@ -367,22 +369,12 @@ computePerfectHashParameters(ArrayRef Hashes) {
 
 static std::vector
 constructPerfectHashTable(ArrayRef Keywords,
-  ArrayRef Hashes, int Size, int Collisions,
-  StringToOffsetTable &OffsetTable) {
-  DenseSet Seen;
+  ArrayRef Hashes,
+  ArrayRef TableValues, int Size,
+  int Collisions, StringToOffsetTable &OffsetTable) {
   std::vector Lookup(Size * Collisions);
 
-  for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
-StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
-// We do not want to add repeated entries for cases with the same name, 
only
-// an entry for the first, with the name collision enum values immediately
-// following.
-if (!Seen.insert(ImplName).second)
-  continue;
-
-uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
-
+  for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
 uint64_t Idx = (HashValue % static_cast(Size)) *
static_cast(Collisions);
 
@@ -390,8 +382,7 @@ constructPerfectHashTable(ArrayRef 
Keywords,
 for (int J = 0; J < Collisions; ++J) {
   LookupEntry &Entry = Lookup[Idx + J];
   if (Entry.TableValue == 0) {
-Entry.FuncName = ImplName;
-Entry.TableValue = LibCallImpl.getEnumVal();
+Entry.TableValue = TableValue;
 Entry.Hash = HashValue;
 Found = true;
 break;
@@ -399,7 +390,7 @@ constructPerfectHashTable(ArrayRef 
Keywords,
 }
 
 if (!Found)
-  reportFatalInternalError("failure to hash " + ImplName);
+  reportFatalInternalError("failure to hash");
   }
 
   return Lookup;
@@ -409,15 +400,25 @@ constructPerfectHashTable(ArrayRef 
Keywords,
 void RuntimeLibcallEmitter::emitNameMatchHashTable(
 raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
   std::vector Hashes(RuntimeLibcallImplDefList.size());
+  std::vector TableValues(RuntimeLibcallImplDefList.size());
+  DenseSet SeenFuncNames;
 
   size_t MaxFuncNameSize = 0;
   size_t Index = 0;
+
   for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
 StringRef ImplName = LibCallImpl.getLibcallFuncName();
-MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
-Hashes[Index++] = hash(ImplName);
+if (SeenFuncNames.insert(ImplName).second) {
+  MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
+  TableValues[Index] = LibCallImpl.getEnumVal();
+  Hashes[Index++] = hash(ImplName);
+}
   }
 
+  // Trim excess elements from non-unique entries.
+  Hashes.resize(SeenFuncNames.size());
+  TableValues.resize(SeenFuncNames.size());
+
   LLVM_DEBUG({
 for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
   StringRef ImplName = LibCallImpl.getLibcallFuncName();
@@ -447,8 +448,9 @@ void Runtim

[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)

2025-08-15 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/153801
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)

2025-08-15 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/153801

We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.

BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.

As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.

>From f445a169e99a9e603eb285ddf9bdd56df0719d2d Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 15 Aug 2025 09:45:46 +0900
Subject: [PATCH] RuntimeLibcalls: Fix building hash table with duplicate
 entries

We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.

BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.

As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.
---
 .../TableGen/Basic/RuntimeLibcallsEmitter.cpp | 59 ++-
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp 
b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index 1c5f38d0c24b8..05f2512e24a50 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -289,7 +289,6 @@ class RuntimeLibcallEmitter {
 
 /// Helper struct for the name hash table.
 struct LookupEntry {
-  StringRef FuncName;
   uint64_t Hash = 0;
   unsigned TableValue = 0;
 };
@@ -339,14 +338,17 @@ static void emitHashFunction(raw_ostream &OS) {
 /// Return the table size, maximum number of collisions for the set of hashes
 static std::pair
 computePerfectHashParameters(ArrayRef Hashes) {
-  const int SizeOverhead = 10;
-  const int NumHashes = Hashes.size();
+  // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+  const int SizeOverhead = 4;
 
   // Index derived from hash -> number of collisions.
   DenseMap Table;
 
+  unsigned NumHashes = Hashes.size();
+
   for (int MaxCollisions = 1;; ++MaxCollisions) {
-for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead * 
NumHashes;
+ N <<= 1) {
   Table.clear();
 
   bool NeedResize = false;
@@ -367,22 +369,12 @@ computePerfectHashParameters(ArrayRef Hashes) {
 
 static std::vector
 constructPerfectHashTable(ArrayRef Keywords,
-  ArrayRef Hashes, int Size, int Collisions,
-  StringToOffsetTable &OffsetTable) {
-  DenseSet Seen;
+  ArrayRef Hashes,
+  ArrayRef TableValues, int Size,
+  int Collisions, StringToOffsetTable &OffsetTable) {
   std::vector Lookup(Size * Collisions);
 
-  for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
-StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
-// We do not want to add repeated entries for cases with the same name, 
only
-// an entry for the first, with the name collision enum values immediately
-// following.
-if (!Seen.insert(ImplName).second)
-  continue;
-
-uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
-
+  for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
 uint64_t Idx = (HashValue % static_cast(Size)) *
static_cast(Collisions);
 
@@ -390,8 +382,7 @@ constructPerfectHashTable(ArrayRef 
Keywords,
 for (int J = 0; J < Collisions; ++J) {
   LookupEntry &Entry = Lookup[Idx + J];
   if (Entry.TableValue == 0) {
-Entry.FuncName = ImplName;
-Entry.TableValue = LibCallImpl.getEnumVal();
+Entry.TableValue = TableValue;
 Entry.Hash = HashValue;
 Found = true;
 break;
@@ -399,7 +390,7 @@ constructPerfectHashTable(ArrayRef 
Keywords,
 }
 
 if (!Found)
-  reportFatalInternalError("failure to hash " + ImplName);
+  reportFatalInternalError("failure to hash");
   }
 
   return Lookup;
@@ -409,15 +400,25 @@ constructPerfectHashTable(ArrayRef 
Keywords,
 void RuntimeLibcallEmitter::emitNameMatchHashTable(
 raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
   std::vector Hashes(RuntimeLibcallImplDefList.size());
+  std::vector TableValues(RuntimeLibcallImplDefList.size());
+  DenseSet SeenFuncNames;
 
   size_t MaxFuncNameSize = 0;
   size_t Index = 0;
+
   for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
 StringRef ImplName = LibCallImpl.getL

[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)

2025-08-15 Thread Matt Arsenault via llvm-branch-commits


arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/153801?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#153801** https://app.graphite.dev/github/pr/llvm/llvm-project/153801?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/153801?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#153210** https://app.graphite.dev/github/pr/llvm/llvm-project/153210?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#153209** https://app.graphite.dev/github/pr/llvm/llvm-project/153209?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#150192** https://app.graphite.dev/github/pr/llvm/llvm-project/150192?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#149836** https://app.graphite.dev/github/pr/llvm/llvm-project/149836?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/153801
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang][OpenMP] Parse GROUPPRIVATE directive (PR #153807)

2025-08-15 Thread Krzysztof Parzyszek via llvm-branch-commits


https://github.com/kparzysz created 
https://github.com/llvm/llvm-project/pull/153807

No semantic checks or lowering yet.

>From ccc414db30f65308d47d2efbb3198a896bd5a67e Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Fri, 15 Aug 2025 08:12:45 -0500
Subject: [PATCH] [flang][OpenMP] Parse GROUPPRIVATE directive

No semantic checks or lowering yet.
---
 flang/include/flang/Parser/dump-parse-tree.h  |  1 +
 flang/include/flang/Parser/parse-tree.h   | 14 +++--
 flang/lib/Lower/OpenMP/OpenMP.cpp |  7 +
 flang/lib/Parser/openmp-parsers.cpp   |  8 +
 flang/lib/Parser/unparse.cpp  |  7 +
 flang/lib/Semantics/check-omp-structure.cpp   | 13 
 flang/lib/Semantics/check-omp-structure.h |  2 ++
 flang/test/Lower/OpenMP/Todo/groupprivate.f90 |  9 ++
 flang/test/Parser/OpenMP/groupprivate.f90 | 30 +++
 9 files changed, 89 insertions(+), 2 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/Todo/groupprivate.f90
 create mode 100644 flang/test/Parser/OpenMP/groupprivate.f90

diff --git a/flang/include/flang/Parser/dump-parse-tree.h 
b/flang/include/flang/Parser/dump-parse-tree.h
index 2c666a6d09a7b..8fbc6ccc639bf 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -729,6 +729,7 @@ class ParseTreeDumper {
   NODE(parser, OpenMPLoopConstruct)
   NODE(parser, OpenMPExecutableAllocate)
   NODE(parser, OpenMPAllocatorsConstruct)
+  NODE(parser, OpenMPGroupprivate)
   NODE(parser, OpenMPRequiresConstruct)
   NODE(parser, OpenMPSimpleStandaloneConstruct)
   NODE(parser, OpenMPStandaloneConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h 
b/flang/include/flang/Parser/parse-tree.h
index e72190f019dd1..ae0259fe9025e 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4943,6 +4943,15 @@ struct OpenMPDeclareSimdConstruct {
   std::tuple, OmpClauseList> t;
 };
 
+// ref: [6.0:301-303]
+//
+// groupprivate-directive ->
+//GROUPPRIVATE (variable-list-item...)  // since 6.0
+struct OpenMPGroupprivate {
+  WRAPPER_CLASS_BOILERPLATE(OpenMPGroupprivate, OmpDirectiveSpecification);
+  CharBlock source;
+};
+
 // 2.4 requires -> REQUIRES requires-clause[ [ [,] requires-clause]...]
 struct OpenMPRequiresConstruct {
   TUPLE_CLASS_BOILERPLATE(OpenMPRequiresConstruct);
@@ -4970,8 +4979,9 @@ struct OpenMPDeclarativeConstruct {
   std::variant
+  OmpDeclareVariantDirective, OpenMPGroupprivate, OpenMPThreadprivate,
+  OpenMPRequiresConstruct, OpenMPUtilityConstruct,
+  OmpMetadirectiveDirective>
   u;
 };
 
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fef64ccc15015..ec2ec37e623f8 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3593,6 +3593,13 @@ genOMP(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   }
 }
 
+static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
+   semantics::SemanticsContext &semaCtx,
+   lower::pft::Evaluation &eval,
+   const parser::OpenMPGroupprivate &directive) {
+  TODO(converter.getCurrentLocation(), "GROUPPRIVATE");
+}
+
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
diff --git a/flang/lib/Parser/openmp-parsers.cpp 
b/flang/lib/Parser/openmp-parsers.cpp
index 46b14861096f1..41c16212f5771 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -1773,6 +1773,12 @@ 
TYPE_PARSER(sourced(construct(
 verbatim("DECLARE SIMD"_tok) || verbatim("DECLARE_SIMD"_tok),
 maybe(parenthesized(name)), Parser{})))
 
+TYPE_PARSER(sourced( //
+construct(
+predicated(OmpDirectiveNameParser{},
+IsDirective(llvm::omp::Directive::OMPD_groupprivate)) >=
+Parser{})))
+
 // 2.4 Requires construct
 TYPE_PARSER(sourced(construct(
 verbatim("REQUIRES"_tok), Parser{})))
@@ -1808,6 +1814,8 @@ TYPE_PARSER(
 Parser{}) ||
 construct(
 Parser{}) ||
+construct(
+Parser{}) ||
 construct(
 Parser{}) ||
 construct(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 4f8d498972807..4294a6d491648 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2716,6 +2716,13 @@ class UnparseVisitor {
   void Unparse(const OpenMPDispatchConstruct &x) { //
 Unparse(static_cast(x));
   }
+  void Unparse(const OpenMPGroupprivate &x) {
+BeginOpenMP();
+Word("!$OMP ");
+Walk(x.v);
+Put("\n");
+EndOpenMP();
+  }
   v

[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)

2025-08-15 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/153801

>From 6e2b170ea709c205ad27b3e326a4d4ade7822f53 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 15 Aug 2025 09:45:46 +0900
Subject: [PATCH] RuntimeLibcalls: Fix building hash table with duplicate
 entries

We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.

BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.

As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.
---
 llvm/test/TableGen/RuntimeLibcallEmitter.td   |  4 +-
 .../TableGen/Basic/RuntimeLibcallsEmitter.cpp | 76 ---
 2 files changed, 35 insertions(+), 45 deletions(-)

diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter.td 
b/llvm/test/TableGen/RuntimeLibcallEmitter.td
index 7c62402227f7d..2d19d534ec3ef 100644
--- a/llvm/test/TableGen/RuntimeLibcallEmitter.td
+++ b/llvm/test/TableGen/RuntimeLibcallEmitter.td
@@ -176,9 +176,9 @@ def BlahLibrary : SystemRuntimeLibrary 
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplNameImpl(StringRef Name) {
 // CHECK: static constexpr uint16_t HashTableNameToEnum[16] = {
-// CHECK: 2, // 0x00705301b8, ___memset
+// CHECK: 2,
 // CHECK: 0,
-// CHECK: 6, // 0x001417a2af, calloc
+// CHECK: 6,
 // CHECK: 0,
 // CHECK: };
 
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp 
b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index c305e6323ca9d..a8ec873f4587e 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -287,13 +287,6 @@ class RuntimeLibcallEmitter {
   void run(raw_ostream &OS);
 };
 
-/// Helper struct for the name hash table.
-struct LookupEntry {
-  StringRef FuncName;
-  uint64_t Hash = 0;
-  unsigned TableValue = 0;
-};
-
 } // End anonymous namespace.
 
 void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const {
@@ -339,14 +332,17 @@ static void emitHashFunction(raw_ostream &OS) {
 /// Return the table size, maximum number of collisions for the set of hashes
 static std::pair
 computePerfectHashParameters(ArrayRef Hashes) {
-  const int SizeOverhead = 10;
-  const int NumHashes = Hashes.size();
+  // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+  const int SizeOverhead = 4;
 
   // Index derived from hash -> number of collisions.
   DenseMap Table;
 
+  unsigned NumHashes = Hashes.size();
+
   for (int MaxCollisions = 1;; ++MaxCollisions) {
-for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead * 
NumHashes;
+ N <<= 1) {
   Table.clear();
 
   bool NeedResize = false;
@@ -365,41 +361,29 @@ computePerfectHashParameters(ArrayRef Hashes) {
   }
 }
 
-static std::vector
+static std::vector
 constructPerfectHashTable(ArrayRef Keywords,
-  ArrayRef Hashes, int Size, int Collisions,
-  StringToOffsetTable &OffsetTable) {
-  DenseSet Seen;
-  std::vector Lookup(Size * Collisions);
-
-  for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
-StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
-// We do not want to add repeated entries for cases with the same name, 
only
-// an entry for the first, with the name collision enum values immediately
-// following.
-if (!Seen.insert(ImplName).second)
-  continue;
-
-uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
+  ArrayRef Hashes,
+  ArrayRef TableValues, int Size,
+  int Collisions, StringToOffsetTable &OffsetTable) {
+  std::vector Lookup(Size * Collisions);
 
+  for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
 uint64_t Idx = (HashValue % static_cast(Size)) *
static_cast(Collisions);
 
 bool Found = false;
 for (int J = 0; J < Collisions; ++J) {
-  LookupEntry &Entry = Lookup[Idx + J];
-  if (Entry.TableValue == 0) {
-Entry.FuncName = ImplName;
-Entry.TableValue = LibCallImpl.getEnumVal();
-Entry.Hash = HashValue;
+  unsigned &Entry = Lookup[Idx + J];
+  if (Entry == 0) {
+Entry = TableValue;
 Found = true;
 break;
   }
 }
 
 if (!Found)
-  reportFatalInternalError("failure to hash " + ImplName);
+  reportFatalInternalError("failure to hash");
   }
 
   return Lookup;
@@ -409,15 +393,25 @@ constructPerfectHashTable(ArrayRef 
Keywords,
 void RuntimeLibcallEmitter::emitNameMatchHashTable(
 raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
   std::vecto

[llvm-branch-commits] [flang] [flang][OpenMP] Parse GROUPPRIVATE directive (PR #153807)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:



@llvm/pr-subscribers-flang-fir-hlfir

@llvm/pr-subscribers-flang-openmp

Author: Krzysztof Parzyszek (kparzysz)


Changes

No semantic checks or lowering yet.

---
Full diff: https://github.com/llvm/llvm-project/pull/153807.diff


9 Files Affected:

- (modified) flang/include/flang/Parser/dump-parse-tree.h (+1) 
- (modified) flang/include/flang/Parser/parse-tree.h (+12-2) 
- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+7) 
- (modified) flang/lib/Parser/openmp-parsers.cpp (+8) 
- (modified) flang/lib/Parser/unparse.cpp (+7) 
- (modified) flang/lib/Semantics/check-omp-structure.cpp (+13) 
- (modified) flang/lib/Semantics/check-omp-structure.h (+2) 
- (added) flang/test/Lower/OpenMP/Todo/groupprivate.f90 (+9) 
- (added) flang/test/Parser/OpenMP/groupprivate.f90 (+30) 


``diff
diff --git a/flang/include/flang/Parser/dump-parse-tree.h 
b/flang/include/flang/Parser/dump-parse-tree.h
index 2c666a6d09a7b..8fbc6ccc639bf 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -729,6 +729,7 @@ class ParseTreeDumper {
   NODE(parser, OpenMPLoopConstruct)
   NODE(parser, OpenMPExecutableAllocate)
   NODE(parser, OpenMPAllocatorsConstruct)
+  NODE(parser, OpenMPGroupprivate)
   NODE(parser, OpenMPRequiresConstruct)
   NODE(parser, OpenMPSimpleStandaloneConstruct)
   NODE(parser, OpenMPStandaloneConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h 
b/flang/include/flang/Parser/parse-tree.h
index e72190f019dd1..ae0259fe9025e 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4943,6 +4943,15 @@ struct OpenMPDeclareSimdConstruct {
   std::tuple, OmpClauseList> t;
 };
 
+// ref: [6.0:301-303]
+//
+// groupprivate-directive ->
+//GROUPPRIVATE (variable-list-item...)  // since 6.0
+struct OpenMPGroupprivate {
+  WRAPPER_CLASS_BOILERPLATE(OpenMPGroupprivate, OmpDirectiveSpecification);
+  CharBlock source;
+};
+
 // 2.4 requires -> REQUIRES requires-clause[ [ [,] requires-clause]...]
 struct OpenMPRequiresConstruct {
   TUPLE_CLASS_BOILERPLATE(OpenMPRequiresConstruct);
@@ -4970,8 +4979,9 @@ struct OpenMPDeclarativeConstruct {
   std::variant
+  OmpDeclareVariantDirective, OpenMPGroupprivate, OpenMPThreadprivate,
+  OpenMPRequiresConstruct, OpenMPUtilityConstruct,
+  OmpMetadirectiveDirective>
   u;
 };
 
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fef64ccc15015..ec2ec37e623f8 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3593,6 +3593,13 @@ genOMP(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   }
 }
 
+static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
+   semantics::SemanticsContext &semaCtx,
+   lower::pft::Evaluation &eval,
+   const parser::OpenMPGroupprivate &directive) {
+  TODO(converter.getCurrentLocation(), "GROUPPRIVATE");
+}
+
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
diff --git a/flang/lib/Parser/openmp-parsers.cpp 
b/flang/lib/Parser/openmp-parsers.cpp
index 46b14861096f1..41c16212f5771 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -1773,6 +1773,12 @@ 
TYPE_PARSER(sourced(construct(
 verbatim("DECLARE SIMD"_tok) || verbatim("DECLARE_SIMD"_tok),
 maybe(parenthesized(name)), Parser{})))
 
+TYPE_PARSER(sourced( //
+construct(
+predicated(OmpDirectiveNameParser{},
+IsDirective(llvm::omp::Directive::OMPD_groupprivate)) >=
+Parser{})))
+
 // 2.4 Requires construct
 TYPE_PARSER(sourced(construct(
 verbatim("REQUIRES"_tok), Parser{})))
@@ -1808,6 +1814,8 @@ TYPE_PARSER(
 Parser{}) ||
 construct(
 Parser{}) ||
+construct(
+Parser{}) ||
 construct(
 Parser{}) ||
 construct(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 4f8d498972807..4294a6d491648 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2716,6 +2716,13 @@ class UnparseVisitor {
   void Unparse(const OpenMPDispatchConstruct &x) { //
 Unparse(static_cast(x));
   }
+  void Unparse(const OpenMPGroupprivate &x) {
+BeginOpenMP();
+Word("!$OMP ");
+Walk(x.v);
+Put("\n");
+EndOpenMP();
+  }
   void Unparse(const OpenMPRequiresConstruct &y) {
 BeginOpenMP();
 Word("!$OMP REQUIRES ");
diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index bf126bbb0d8c1..ea8c391999331 100644
--- a/flang/lib/Semantic

[llvm-branch-commits] [flang] [flang][OpenMP] Parse GROUPPRIVATE directive (PR #153807)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-flang-parser

Author: Krzysztof Parzyszek (kparzysz)


Changes

No semantic checks or lowering yet.

---
Full diff: https://github.com/llvm/llvm-project/pull/153807.diff


9 Files Affected:

- (modified) flang/include/flang/Parser/dump-parse-tree.h (+1) 
- (modified) flang/include/flang/Parser/parse-tree.h (+12-2) 
- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+7) 
- (modified) flang/lib/Parser/openmp-parsers.cpp (+8) 
- (modified) flang/lib/Parser/unparse.cpp (+7) 
- (modified) flang/lib/Semantics/check-omp-structure.cpp (+13) 
- (modified) flang/lib/Semantics/check-omp-structure.h (+2) 
- (added) flang/test/Lower/OpenMP/Todo/groupprivate.f90 (+9) 
- (added) flang/test/Parser/OpenMP/groupprivate.f90 (+30) 


``diff
diff --git a/flang/include/flang/Parser/dump-parse-tree.h 
b/flang/include/flang/Parser/dump-parse-tree.h
index 2c666a6d09a7b..8fbc6ccc639bf 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -729,6 +729,7 @@ class ParseTreeDumper {
   NODE(parser, OpenMPLoopConstruct)
   NODE(parser, OpenMPExecutableAllocate)
   NODE(parser, OpenMPAllocatorsConstruct)
+  NODE(parser, OpenMPGroupprivate)
   NODE(parser, OpenMPRequiresConstruct)
   NODE(parser, OpenMPSimpleStandaloneConstruct)
   NODE(parser, OpenMPStandaloneConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h 
b/flang/include/flang/Parser/parse-tree.h
index e72190f019dd1..ae0259fe9025e 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4943,6 +4943,15 @@ struct OpenMPDeclareSimdConstruct {
   std::tuple, OmpClauseList> t;
 };
 
+// ref: [6.0:301-303]
+//
+// groupprivate-directive ->
+//GROUPPRIVATE (variable-list-item...)  // since 6.0
+struct OpenMPGroupprivate {
+  WRAPPER_CLASS_BOILERPLATE(OpenMPGroupprivate, OmpDirectiveSpecification);
+  CharBlock source;
+};
+
 // 2.4 requires -> REQUIRES requires-clause[ [ [,] requires-clause]...]
 struct OpenMPRequiresConstruct {
   TUPLE_CLASS_BOILERPLATE(OpenMPRequiresConstruct);
@@ -4970,8 +4979,9 @@ struct OpenMPDeclarativeConstruct {
   std::variant
+  OmpDeclareVariantDirective, OpenMPGroupprivate, OpenMPThreadprivate,
+  OpenMPRequiresConstruct, OpenMPUtilityConstruct,
+  OmpMetadirectiveDirective>
   u;
 };
 
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fef64ccc15015..ec2ec37e623f8 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3593,6 +3593,13 @@ genOMP(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   }
 }
 
+static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
+   semantics::SemanticsContext &semaCtx,
+   lower::pft::Evaluation &eval,
+   const parser::OpenMPGroupprivate &directive) {
+  TODO(converter.getCurrentLocation(), "GROUPPRIVATE");
+}
+
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
diff --git a/flang/lib/Parser/openmp-parsers.cpp 
b/flang/lib/Parser/openmp-parsers.cpp
index 46b14861096f1..41c16212f5771 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -1773,6 +1773,12 @@ 
TYPE_PARSER(sourced(construct(
 verbatim("DECLARE SIMD"_tok) || verbatim("DECLARE_SIMD"_tok),
 maybe(parenthesized(name)), Parser{})))
 
+TYPE_PARSER(sourced( //
+construct(
+predicated(OmpDirectiveNameParser{},
+IsDirective(llvm::omp::Directive::OMPD_groupprivate)) >=
+Parser{})))
+
 // 2.4 Requires construct
 TYPE_PARSER(sourced(construct(
 verbatim("REQUIRES"_tok), Parser{})))
@@ -1808,6 +1814,8 @@ TYPE_PARSER(
 Parser{}) ||
 construct(
 Parser{}) ||
+construct(
+Parser{}) ||
 construct(
 Parser{}) ||
 construct(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 4f8d498972807..4294a6d491648 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2716,6 +2716,13 @@ class UnparseVisitor {
   void Unparse(const OpenMPDispatchConstruct &x) { //
 Unparse(static_cast(x));
   }
+  void Unparse(const OpenMPGroupprivate &x) {
+BeginOpenMP();
+Word("!$OMP ");
+Walk(x.v);
+Put("\n");
+EndOpenMP();
+  }
   void Unparse(const OpenMPRequiresConstruct &y) {
 BeginOpenMP();
 Word("!$OMP REQUIRES ");
diff --git a/flang/lib/Semantics/check-omp-structure.cpp 
b/flang/lib/Semantics/check-omp-structure.cpp
index bf126bbb0d8c1..ea8c391999331 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang

[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)

2025-08-15 Thread Matt Arsenault via llvm-branch-commits


https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/153801

>From 6e2b170ea709c205ad27b3e326a4d4ade7822f53 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 15 Aug 2025 09:45:46 +0900
Subject: [PATCH] RuntimeLibcalls: Fix building hash table with duplicate
 entries

We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.

BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.

As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.
---
 llvm/test/TableGen/RuntimeLibcallEmitter.td   |  4 +-
 .../TableGen/Basic/RuntimeLibcallsEmitter.cpp | 76 ---
 2 files changed, 35 insertions(+), 45 deletions(-)

diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter.td 
b/llvm/test/TableGen/RuntimeLibcallEmitter.td
index 7c62402227f7d..2d19d534ec3ef 100644
--- a/llvm/test/TableGen/RuntimeLibcallEmitter.td
+++ b/llvm/test/TableGen/RuntimeLibcallEmitter.td
@@ -176,9 +176,9 @@ def BlahLibrary : SystemRuntimeLibrary 
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplNameImpl(StringRef Name) {
 // CHECK: static constexpr uint16_t HashTableNameToEnum[16] = {
-// CHECK: 2, // 0x00705301b8, ___memset
+// CHECK: 2,
 // CHECK: 0,
-// CHECK: 6, // 0x001417a2af, calloc
+// CHECK: 6,
 // CHECK: 0,
 // CHECK: };
 
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp 
b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index c305e6323ca9d..a8ec873f4587e 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -287,13 +287,6 @@ class RuntimeLibcallEmitter {
   void run(raw_ostream &OS);
 };
 
-/// Helper struct for the name hash table.
-struct LookupEntry {
-  StringRef FuncName;
-  uint64_t Hash = 0;
-  unsigned TableValue = 0;
-};
-
 } // End anonymous namespace.
 
 void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const {
@@ -339,14 +332,17 @@ static void emitHashFunction(raw_ostream &OS) {
 /// Return the table size, maximum number of collisions for the set of hashes
 static std::pair
 computePerfectHashParameters(ArrayRef Hashes) {
-  const int SizeOverhead = 10;
-  const int NumHashes = Hashes.size();
+  // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+  const int SizeOverhead = 4;
 
   // Index derived from hash -> number of collisions.
   DenseMap Table;
 
+  unsigned NumHashes = Hashes.size();
+
   for (int MaxCollisions = 1;; ++MaxCollisions) {
-for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead * 
NumHashes;
+ N <<= 1) {
   Table.clear();
 
   bool NeedResize = false;
@@ -365,41 +361,29 @@ computePerfectHashParameters(ArrayRef Hashes) {
   }
 }
 
-static std::vector
+static std::vector
 constructPerfectHashTable(ArrayRef Keywords,
-  ArrayRef Hashes, int Size, int Collisions,
-  StringToOffsetTable &OffsetTable) {
-  DenseSet Seen;
-  std::vector Lookup(Size * Collisions);
-
-  for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
-StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
-// We do not want to add repeated entries for cases with the same name, 
only
-// an entry for the first, with the name collision enum values immediately
-// following.
-if (!Seen.insert(ImplName).second)
-  continue;
-
-uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
+  ArrayRef Hashes,
+  ArrayRef TableValues, int Size,
+  int Collisions, StringToOffsetTable &OffsetTable) {
+  std::vector Lookup(Size * Collisions);
 
+  for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
 uint64_t Idx = (HashValue % static_cast(Size)) *
static_cast(Collisions);
 
 bool Found = false;
 for (int J = 0; J < Collisions; ++J) {
-  LookupEntry &Entry = Lookup[Idx + J];
-  if (Entry.TableValue == 0) {
-Entry.FuncName = ImplName;
-Entry.TableValue = LibCallImpl.getEnumVal();
-Entry.Hash = HashValue;
+  unsigned &Entry = Lookup[Idx + J];
+  if (Entry == 0) {
+Entry = TableValue;
 Found = true;
 break;
   }
 }
 
 if (!Found)
-  reportFatalInternalError("failure to hash " + ImplName);
+  reportFatalInternalError("failure to hash");
   }
 
   return Lookup;
@@ -409,15 +393,25 @@ constructPerfectHashTable(ArrayRef 
Keywords,
 void RuntimeLibcallEmitter::emitNameMatchHashTable(
 raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
   std::vecto

[llvm-branch-commits] [clang] [LifetimeSafety] Prevent duplicate loans and statement visits (PR #153661)

2025-08-15 Thread Gábor Horváth via llvm-branch-commits


https://github.com/Xazax-hun commented:

I am wondering if this is the right approach. If everything works out well, 
every time we call `Visit` on an expression, there should be a guarantee we 
already visited all the subexpressions of it (modulo some corner cases with 
short circuiting operators, ternaries and trivially false branches). So, we 
might be able to structure the code in a way that we never need to call `Visit` 
recursively for a subexpression, and we do not need to keep a `VisitedStmts` 
set. 

But in case that does not work out for some reason I am also fine with this 
approach. 

https://github.com/llvm/llvm-project/pull/153661
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang][PAC][ObjC] Merge the block metadata support for the arm64e abi to llvm 21 (PR #153725)

2025-08-15 Thread Aaron Ballman via llvm-branch-commits



@@ -155,6 +155,7 @@ FEATURE(ptrauth_vtable_pointer_address_discrimination, 
LangOpts.PointerAuthVTPtr
 FEATURE(ptrauth_vtable_pointer_type_discrimination, 
LangOpts.PointerAuthVTPtrTypeDiscrimination)
 FEATURE(ptrauth_type_info_vtable_pointer_discrimination, 
LangOpts.PointerAuthTypeInfoVTPtrDiscrimination)
 FEATURE(ptrauth_member_function_pointer_type_discrimination, 
LangOpts.PointerAuthCalls)
+FEATURE(ptrauth_signed_block_descriptors, 
LangOpts.PointerAuthBlockDescriptorPointers)

AaronBallman wrote:

This follows the pattern of the other ptrauth work but none of these should 
have been exposed as features to begin with...

https://github.com/llvm/llvm-project/pull/153725
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang][PAC][ObjC] Merge the block metadata support for the arm64e abi to llvm 21 (PR #153725)

2025-08-15 Thread Aaron Ballman via llvm-branch-commits


https://github.com/AaronBallman edited 
https://github.com/llvm/llvm-project/pull/153725
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang][PAC][ObjC] Merge the block metadata support for the arm64e abi to llvm 21 (PR #153725)

2025-08-15 Thread Aaron Ballman via llvm-branch-commits


https://github.com/AaronBallman commented:

Not opposed but this is a pretty significant amount of change for being this 
late in the rc cycles, and the changes haven't been upstream for very long. How 
risky are these changes?

https://github.com/llvm/llvm-project/pull/153725
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -0,0 +1,126 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --filter-out-after "^middle.block:" --filter-out-after "^scalar.ph:" 
--version 4
+; RUN: opt -S -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 
-passes=loop-vectorize,instcombine,early-cse 
-prefer-predicate-over-epilogue=predicate-dont-vectorize 
-force-vector-interleave=1 %s | FileCheck %s
+
+define dso_local void @alias_mask(ptr noalias %a, ptr %b, ptr %c, i64 %n) {

SamTebbs33 wrote:

Done, let me know if anything needs to be added or changed with them.

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -2030,32 +2031,68 @@ Value *llvm::addDiffRuntimeChecks(
   // Map to keep track of created compares, The key is the pair of operands for
   // the compare, to allow detecting and re-using redundant compares.
   DenseMap, Value *> SeenCompares;
-  for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
+  Value *AliasLaneMask = nullptr;
+  for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze,
+WriteAfterRead] : Checks) {
 Type *Ty = SinkStart->getType();
-// Compute VF * IC * AccessSize.
-auto *VFTimesICTimesSize =
-ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
- ConstantInt::get(Ty, IC * AccessSize));
-Value *Diff =
-Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
-
-// Check if the same compare has already been created earlier. In that 
case,
-// there is no need to check it again.
-Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
-if (IsConflict)
-  continue;
+if (!VF.isScalar() && UseSafeEltsMask) {
+  Value *Sink = Expander.expandCodeFor(SinkStart, Ty, Loc);
+  Value *Src = Expander.expandCodeFor(SrcStart, Ty, Loc);
+  unsigned IntOpc = WriteAfterRead ? Intrinsic::loop_dependence_war_mask
+   : Intrinsic::loop_dependence_raw_mask;
+  Value *SourceAsPtr = ChkBuilder.CreateCast(Instruction::IntToPtr, Src,
+ ChkBuilder.getPtrTy());
+  Value *SinkAsPtr = ChkBuilder.CreateCast(Instruction::IntToPtr, Sink,
+   ChkBuilder.getPtrTy());
+  Value *M = ChkBuilder.CreateIntrinsic(
+  IntOpc, {VectorType::get(ChkBuilder.getInt1Ty(), VF)},
+  {SourceAsPtr, SinkAsPtr, ChkBuilder.getInt64(AccessSize)}, nullptr,
+  "alias.lane.mask");
+  if (AliasLaneMask)
+M = ChkBuilder.CreateAnd(AliasLaneMask, M);
+  else
+AliasLaneMask = M;
+} else {
+  // Compute VF * IC * AccessSize.
+  auto *VFTimesICTimesSize =
+  ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
+   ConstantInt::get(Ty, IC * AccessSize));
+  Value *Diff =
+  Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, 
Loc);
+
+  // Check if the same compare has already been created earlier. In that
+  // case, there is no need to check it again.
+  Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});

SamTebbs33 wrote:

That should help, done.

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -2063,6 +2080,12 @@ static bool 
useActiveLaneMaskForControlFlow(TailFoldingStyle Style) {
  Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
 }
 
+static bool useSafeEltsMask(TailFoldingStyle TFStyle, RTCheckStyle Style,
+ElementCount VF, const TargetTransformInfo &TTI) {
+  return useActiveLaneMask(TFStyle) && Style == RTCheckStyle::UseSafeEltsMask 
&&
+ TTI.useSafeEltsMask(VF);
+}
+

SamTebbs33 wrote:

Done.

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -0,0 +1,126 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --filter-out-after "^middle.block:" --filter-out-after "^scalar.ph:" 
--version 4
+; RUN: opt -S -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 
-passes=loop-vectorize,instcombine,early-cse 
-prefer-predicate-over-epilogue=predicate-dont-vectorize 
-force-vector-interleave=1 %s | FileCheck %s

SamTebbs33 wrote:

Sounds sensible to me, done.

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -1347,6 +1354,11 @@ class TargetTransformInfo {
   PartialReductionExtendKind OpBExtend, std::optional BinOp,
   TTI::TargetCostKind CostKind) const;
 
+  /// \return true if a mask should be formed that disables lanes that could
+  /// alias between two pointers. The mask is created by the
+  /// loop_dependence_{war,raw}_mask intrinsics.
+  LLVM_ABI bool useSafeEltsMask(ElementCount VF) const;

SamTebbs33 wrote:

That would work if I return invalid for the cost, but @sdesmalen-arm has 
suggested I calculate the cost of the expanded intrinsic instead of returning 
invalid: https://github.com/llvm/llvm-project/pull/100579/files#r2262969228

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -482,11 +482,14 @@ bool RuntimePointerChecking::tryToCreateDiffCheck(
 }
   }
 
+  bool WriteAfterRead = isa(SrcInsts[0]);

SamTebbs33 wrote:

That's better, thanks.

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -974,6 +974,11 @@ AArch64TTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
 }
 break;
   }
+  case Intrinsic::loop_dependence_raw_mask:
+  case Intrinsic::loop_dependence_war_mask:
+if (ST->hasSVE2())

SamTebbs33 wrote:

Done.

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -974,6 +974,11 @@ AArch64TTIImpl::getIntrinsicInstrCost(const 
IntrinsicCostAttributes &ICA,
 }
 break;
   }
+  case Intrinsic::loop_dependence_raw_mask:
+  case Intrinsic::loop_dependence_war_mask:
+if (ST->hasSVE2())
+  return 1;
+return InstructionCost::getInvalid(CostKind);

SamTebbs33 wrote:

It will now get the expanded intrinsic cost instead of returning invalid.

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -5535,6 +5540,11 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
   return Cost;
 }
 
+bool AArch64TTIImpl::useSafeEltsMask(ElementCount VF) const {
+  // The whilewr/rw instructions require SVE2
+  return ST->hasSVE2();

SamTebbs33 wrote:

Done.

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -201,6 +201,13 @@ enum class TailFoldingStyle {
   DataWithEVL,
 };
 
+enum class RTCheckStyle {
+  /// Create runtime checks based on the difference between two pointers
+  ScalarDifference,
+  /// Form a mask based on elements which won't be a WAR or RAW hazard.
+  UseSafeEltsMask,

SamTebbs33 wrote:

I think `NoUnsafeAliasMask` could sound like it means there shouldn't be a 
mask, i.e. `No{...}Mask`.

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -2030,32 +2031,68 @@ Value *llvm::addDiffRuntimeChecks(
   // Map to keep track of created compares, The key is the pair of operands for
   // the compare, to allow detecting and re-using redundant compares.
   DenseMap, Value *> SeenCompares;
-  for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
+  Value *AliasLaneMask = nullptr;
+  for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze,
+WriteAfterRead] : Checks) {
 Type *Ty = SinkStart->getType();
-// Compute VF * IC * AccessSize.
-auto *VFTimesICTimesSize =
-ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
- ConstantInt::get(Ty, IC * AccessSize));
-Value *Diff =
-Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
-
-// Check if the same compare has already been created earlier. In that 
case,
-// there is no need to check it again.
-Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
-if (IsConflict)
-  continue;
+if (!VF.isScalar() && UseSafeEltsMask) {

SamTebbs33 wrote:

I think that `VF.isScalar()` is actually an unnecessary check since the memory 
check block is only used if the LV actually ends up vectorising or if epilogue 
vectorisation is on. In the case of epilogue vectorisation it won't use tail 
predication and tail predication being off turns off `UseSafeEltsMask`.

As per Florian's suggestion I've separated this into two functions.

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -1347,6 +1354,11 @@ class TargetTransformInfo {
   PartialReductionExtendKind OpBExtend, std::optional BinOp,
   TTI::TargetCostKind CostKind) const;
 
+  /// \return true if a mask should be formed that disables lanes that could
+  /// alias between two pointers. The mask is created by the
+  /// loop_dependence_{war,raw}_mask intrinsics.
+  LLVM_ABI bool useSafeEltsMask(ElementCount VF) const;

SamTebbs33 wrote:

That's a hold over from a previous prototype, removed.

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)

2025-08-15 Thread Sam Tebbs via llvm-branch-commits



@@ -2421,7 +2444,6 @@ void 
InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
   LoopVectorPreHeader = SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(),
static_cast(nullptr), LI,
nullptr, "vector.ph");
-

SamTebbs33 wrote:

Done.

https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec updated 
https://github.com/llvm/llvm-project/pull/153881

>From 4ee7e8bf3a0cd4036b601a12bbb5bba61deda993 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Fri, 15 Aug 2025 14:18:56 -0700
Subject: [PATCH] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI.

Co-authored-by: Stephen Thomas 
---
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp 
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index fa3ca27a5f47c..49a681efc79c7 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1749,7 +1749,7 @@ bool 
GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
 
   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
   TII.get(AMDGPU::S_WAITCNT_DEPCTR))
-  .addImm(0x0fff);
+  .addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0));
 
   return true;
 }
@@ -1799,7 +1799,7 @@ bool 
GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
 if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
 SIInstrInfo::isEXP(I) ||
 (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- I.getOperand(0).getImm() == 0x0fff))
+ AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
   return HazardExpired;
 
 // Track registers writes

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec updated 
https://github.com/llvm/llvm-project/pull/153880

>From df0ab0abe2132a729ec1ad18f20faa9b804f0a6f Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Fri, 15 Aug 2025 14:10:55 -0700
Subject: [PATCH] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12
 waitcount instructions

This simply updates the pass's cognizance of these instructions, and for the
most part the hazards where they might be encountered do not exist for gfx12.
Nonetheless, encountering them has to be checked for as doing so would indicate
a compiler error.

Co-authored-by: Stephen Thomas 
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp 
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c1cca063aac6f..fa3ca27a5f47c 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1357,6 +1357,16 @@ bool 
GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
 // DsCnt corresponds to LGKMCnt here.
 return (Decoded.DsCnt == 0);
   }
+  case AMDGPU::S_WAIT_STORECNT:
+  case AMDGPU::S_WAIT_STORECNT_DSCNT:
+  case AMDGPU::S_WAIT_LOADCNT:
+  case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+  case AMDGPU::S_WAIT_SAMPLECNT:
+  case AMDGPU::S_WAIT_BVHCNT:
+  case AMDGPU::S_WAIT_DSCNT:
+  case AMDGPU::S_WAIT_EXPCNT:
+  case AMDGPU::S_WAIT_KMCNT:
+llvm_unreachable("unexpected wait count instruction");
   default:
 // SOPP instructions cannot mitigate the hazard.
 if (TII->isSOPP(MI))
@@ -2254,6 +2264,15 @@ int 
GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
 case AMDGPU::S_WAITCNT_EXPCNT:
 case AMDGPU::S_WAITCNT_LGKMCNT:
 case AMDGPU::S_WAIT_IDLE:
+case AMDGPU::S_WAIT_LOADCNT:
+case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+case AMDGPU::S_WAIT_SAMPLECNT:
+case AMDGPU::S_WAIT_BVHCNT:
+case AMDGPU::S_WAIT_STORECNT:
+case AMDGPU::S_WAIT_STORECNT_DSCNT:
+case AMDGPU::S_WAIT_EXPCNT:
+case AMDGPU::S_WAIT_DSCNT:
+case AMDGPU::S_WAIT_KMCNT:
   return true;
 default:
   break;

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec updated 
https://github.com/llvm/llvm-project/pull/153880

>From df0ab0abe2132a729ec1ad18f20faa9b804f0a6f Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Fri, 15 Aug 2025 14:10:55 -0700
Subject: [PATCH] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12
 waitcount instructions

This simply updates the pass's cognizance of these instructions, and for the
most part the hazards where they might be encountered do not exist for gfx12.
Nonetheless, encountering them has to be checked for as doing so would indicate
a compiler error.

Co-authored-by: Stephen Thomas 
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp 
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c1cca063aac6f..fa3ca27a5f47c 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1357,6 +1357,16 @@ bool 
GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
 // DsCnt corresponds to LGKMCnt here.
 return (Decoded.DsCnt == 0);
   }
+  case AMDGPU::S_WAIT_STORECNT:
+  case AMDGPU::S_WAIT_STORECNT_DSCNT:
+  case AMDGPU::S_WAIT_LOADCNT:
+  case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+  case AMDGPU::S_WAIT_SAMPLECNT:
+  case AMDGPU::S_WAIT_BVHCNT:
+  case AMDGPU::S_WAIT_DSCNT:
+  case AMDGPU::S_WAIT_EXPCNT:
+  case AMDGPU::S_WAIT_KMCNT:
+llvm_unreachable("unexpected wait count instruction");
   default:
 // SOPP instructions cannot mitigate the hazard.
 if (TII->isSOPP(MI))
@@ -2254,6 +2264,15 @@ int 
GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
 case AMDGPU::S_WAITCNT_EXPCNT:
 case AMDGPU::S_WAITCNT_LGKMCNT:
 case AMDGPU::S_WAIT_IDLE:
+case AMDGPU::S_WAIT_LOADCNT:
+case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+case AMDGPU::S_WAIT_SAMPLECNT:
+case AMDGPU::S_WAIT_BVHCNT:
+case AMDGPU::S_WAIT_STORECNT:
+case AMDGPU::S_WAIT_STORECNT_DSCNT:
+case AMDGPU::S_WAIT_EXPCNT:
+case AMDGPU::S_WAIT_DSCNT:
+case AMDGPU::S_WAIT_KMCNT:
   return true;
 default:
   break;

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec updated 
https://github.com/llvm/llvm-project/pull/153881

>From 4ee7e8bf3a0cd4036b601a12bbb5bba61deda993 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Fri, 15 Aug 2025 14:18:56 -0700
Subject: [PATCH] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI.

Co-authored-by: Stephen Thomas 
---
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp 
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index fa3ca27a5f47c..49a681efc79c7 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1749,7 +1749,7 @@ bool 
GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
 
   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
   TII.get(AMDGPU::S_WAITCNT_DEPCTR))
-  .addImm(0x0fff);
+  .addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0));
 
   return true;
 }
@@ -1799,7 +1799,7 @@ bool 
GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
 if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
 SIInstrInfo::isEXP(I) ||
 (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- I.getOperand(0).getImm() == 0x0fff))
+ AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
   return HazardExpired;
 
 // Track registers writes

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] 7aa3dbc - Revert "Remember LLVM_ENABLE_LIBCXX setting in installed configuration (#139712)"

2025-08-15 Thread via llvm-branch-commits


Author: gulfemsavrun
Date: 2025-08-15T16:00:12-07:00
New Revision: 7aa3dbcae91ade86e362fa6ef6a739f839490cbd

URL: 
https://github.com/llvm/llvm-project/commit/7aa3dbcae91ade86e362fa6ef6a739f839490cbd
DIFF: 
https://github.com/llvm/llvm-project/commit/7aa3dbcae91ade86e362fa6ef6a739f839490cbd.diff

LOG: Revert "Remember LLVM_ENABLE_LIBCXX setting in installed configuration 
(#139712)"

This reverts commit b010b7ea89fdb870024b94913b2b784ce1f4f8d4.

Added: 


Modified: 
llvm/cmake/modules/HandleLLVMStdlib.cmake
llvm/cmake/modules/LLVMConfig.cmake.in

Removed: 




diff  --git a/llvm/cmake/modules/HandleLLVMStdlib.cmake 
b/llvm/cmake/modules/HandleLLVMStdlib.cmake
index dda1caa846dcb..a7e138aa0789b 100644
--- a/llvm/cmake/modules/HandleLLVMStdlib.cmake
+++ b/llvm/cmake/modules/HandleLLVMStdlib.cmake
@@ -2,7 +2,6 @@
 # if the user has requested it.
 
 include(DetermineGCCCompatible)
-include(CheckIncludeFiles)
 
 if(NOT DEFINED LLVM_STDLIB_HANDLED)
   set(LLVM_STDLIB_HANDLED ON)
@@ -20,17 +19,7 @@ if(NOT DEFINED LLVM_STDLIB_HANDLED)
 if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
   check_cxx_compiler_flag("-stdlib=libc++" CXX_COMPILER_SUPPORTS_STDLIB)
   check_linker_flag(CXX "-stdlib=libc++" CXX_LINKER_SUPPORTS_STDLIB)
-
-  # Check whether C++ include files are available
-  # runtimes/CMakeLists.txt adds -nostdlib++ and -nostdinc++ to
-  # CMAKE_REQUIRED_FLAGS, which are incompatible with -stdlib=libc++; use
-  # a fresh CMAKE_REQUIRED_FLAGS environment.
-  cmake_push_check_state(RESET)
-  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -stdlib=libc++")
-  check_include_files("chrono" CXX_COMPILER_SUPPORTS_STDLIB_CHRONO 
LANGUAGE CXX)
-  cmake_pop_check_state()
-
-  if(CXX_COMPILER_SUPPORTS_STDLIB AND CXX_LINKER_SUPPORTS_STDLIB AND 
CXX_COMPILER_SUPPORTS_STDLIB_CHRONO)
+  if(CXX_COMPILER_SUPPORTS_STDLIB AND CXX_LINKER_SUPPORTS_STDLIB)
 append("-stdlib=libc++"
   CMAKE_CXX_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS
   CMAKE_MODULE_LINKER_FLAGS)

diff  --git a/llvm/cmake/modules/LLVMConfig.cmake.in 
b/llvm/cmake/modules/LLVMConfig.cmake.in
index c39c33f0c7793..c15b9576cd5d5 100644
--- a/llvm/cmake/modules/LLVMConfig.cmake.in
+++ b/llvm/cmake/modules/LLVMConfig.cmake.in
@@ -55,8 +55,6 @@ endif()
 
 set(LLVM_ENABLE_RTTI @LLVM_ENABLE_RTTI@)
 
-set(LLVM_ENABLE_LIBCXX @LLVM_ENABLE_LIBCXX@)
-
 set(LLVM_ENABLE_LIBEDIT @HAVE_LIBEDIT@)
 if(LLVM_ENABLE_LIBEDIT)
   find_package(LibEdit)



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [CAS] Add MappedFileRegionBumpPtr (PR #114099)

2025-08-15 Thread Steven Wu via llvm-branch-commits


https://github.com/cachemeifyoucan updated 
https://github.com/llvm/llvm-project/pull/114099


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [CAS] Add MappedFileRegionBumpPtr (PR #114099)

2025-08-15 Thread Steven Wu via llvm-branch-commits


https://github.com/cachemeifyoucan updated 
https://github.com/llvm/llvm-project/pull/114099


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)

2025-08-15 Thread Changpeng Fang via llvm-branch-commits


https://github.com/changpeng approved this pull request.


https://github.com/llvm/llvm-project/pull/153881
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)

2025-08-15 Thread Changpeng Fang via llvm-branch-commits


https://github.com/changpeng approved this pull request.


https://github.com/llvm/llvm-project/pull/153880
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE register (PR #153879)

2025-08-15 Thread Changpeng Fang via llvm-branch-commits


https://github.com/changpeng approved this pull request.


https://github.com/llvm/llvm-project/pull/153879
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [RISCV] Track Linker Relaxable through Assembly Relaxation (PR #153670)

2025-08-15 Thread Sam Elliott via llvm-branch-commits


https://github.com/lenary edited 
https://github.com/llvm/llvm-project/pull/153670
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [Github] Drop llvm-project-tests (PR #153877)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-github-workflow

Author: Aiden Grossman (boomanaiden154)


Changes

All users of this have been claned up so we can now drop it fully.


---
Full diff: https://github.com/llvm/llvm-project/pull/153877.diff


2 Files Affected:

- (removed) .github/workflows/llvm-project-tests.yml (-149) 
- (removed) .github/workflows/llvm-project-workflow-tests.yml (-32) 


``diff
diff --git a/.github/workflows/llvm-project-tests.yml 
b/.github/workflows/llvm-project-tests.yml
deleted file mode 100644
index 8621a3b59218e..0
--- a/.github/workflows/llvm-project-tests.yml
+++ /dev/null
@@ -1,149 +0,0 @@
-name: LLVM Project Tests
-
-permissions:
-  contents: read
-
-on:
-  workflow_dispatch:
-inputs:
-  build_target:
-required: false
-  projects:
-required: false
-  extra_cmake_args:
-required: false
-  os_list:
-required: false
-default: '["ubuntu-24.04", "windows-2019", "macOS-13"]'
-  python_version:
-required: false
-type: string
-default: '3.11'
-  workflow_call:
-inputs:
-  build_target:
-required: false
-type: string
-default: "all"
-
-  projects:
-required: true
-type: string
-
-  extra_cmake_args:
-required: false
-type: string
-
-  os_list:
-required: false
-type: string
-# Use windows-2019 due to:
-# 
https://developercommunity.visualstudio.com/t/Prev-Issue---with-__assume-isnan-/1597317
-default: '["ubuntu-24.04", "windows-2019", "macOS-13"]'
-
-  python_version:
-required: false
-type: string
-default: '3.11'
-
-concurrency:
-  # Skip intermediate builds: always.
-  # Cancel intermediate builds: only if it is a pull request build.
-  # If the group name here is the same as the group name in the workflow that 
includes
-  # this one, then the action will try to wait on itself and get stuck.
-  group: llvm-project-${{ github.workflow }}-${{ inputs.projects }}-${{ 
inputs.python_version }}${{ github.ref }}
-  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
-
-jobs:
-  lit-tests:
-name: Lit Tests
-runs-on: ${{ matrix.os }}
-container:
-  image: ${{(startsWith(matrix.os, 'ubuntu') && 
'ghcr.io/llvm/ci-ubuntu-24.04:latest') || null}}
-  volumes:
-- /mnt/:/mnt/
-strategy:
-  fail-fast: false
-  matrix:
-os: ${{ fromJSON(inputs.os_list) }}
-steps:
-  - name: Setup Windows
-if: startsWith(matrix.os, 'windows')
-uses: llvm/actions/setup-windows@main
-with:
-  arch: amd64
-  # On Windows, starting with win19/20220814.1, cmake choose the 32-bit
-  # python3.10.6 libraries instead of the 64-bit libraries when building
-  # lldb.  Using this setup-python action to make 3.10 the default
-  # python fixes this.
-  - name: Setup Python
-uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 
v5.4.0
-with:
-  python-version: ${{ inputs.python_version }}
-  - name: Install Ninja
-if: runner.os != 'Linux'
-uses: llvm/actions/install-ninja@main
-  # actions/checkout deletes any existing files in the new git directory,
-  # so this needs to either run before ccache-action or it has to use
-  # clean: false.
-  - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 
v5.0.0
-with:
-  fetch-depth: 250
-  - name: Setup ccache
-uses: 
hendrikmuhs/ccache-action@a1209f81afb8c005c13b4296c32e363431bffea5 # v1.2.17
-with:
-  # A full build of llvm, clang, lld, and lldb takes about 250MB
-  # of ccache space. There's not much reason to have more than this,
-  # because we usually won't need to save cache entries from older
-  # builds.  Also, there is an overall 10GB cache limit, and each
-  # run creates a new cache entry so we want to ensure that we have
-  # enough cache space for all the tests to run at once and still
-  # fit under the 10 GB limit.
-  # Default to 2G to workaround: 
https://github.com/hendrikmuhs/ccache-action/issues/174
-  max-size: 2G
-  key: ${{ matrix.os }}
-  variant: sccache
-  - name: Build and Test
-env:
-  # Workaround for 
https://github.com/actions/virtual-environments/issues/5900.
-  # This should be a no-op for non-mac OSes
-  PKG_CONFIG_PATH: 
/usr/local/Homebrew/Library/Homebrew/os/mac/pkgconfig//12
-shell: bash
-id: build-llvm
-run: |
-  if [ "${{ runner.os }}" == "Linux" ]; then
-builddir="/mnt/build/"
-sudo mkdir -p $builddir
-sudo chown gha $builddir
-extra_cmake_args="-DCMAKE_CXX_COMPILER=clang++ 
-DCMAKE_C_COMPILER=clang"
-  else
-builddir="$(pwd)"/build

[llvm-branch-commits] [Github] Remove call to llvm-project-tests from libclang tests (PR #153876)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-github-workflow

Author: Aiden Grossman (boomanaiden154)


Changes

This allows for removing llvm-project-tests.yml. This significantly
reduces the complexity of this workflow (including the complexity of
llvm-project-tests.yml) at the cost of a little bit of duplication with
the other workflows that were also using llvm-project-tests.yml.


---
Full diff: https://github.com/llvm/llvm-project/pull/153876.diff


1 Files Affected:

- (modified) .github/workflows/libclang-python-tests.yml (+27-8) 


``diff
diff --git a/.github/workflows/libclang-python-tests.yml 
b/.github/workflows/libclang-python-tests.yml
index 50ef4acf2feb1..edd2f774621b6 100644
--- a/.github/workflows/libclang-python-tests.yml
+++ b/.github/workflows/libclang-python-tests.yml
@@ -25,17 +25,36 @@ on:
 jobs:
   check-clang-python:
 # Build libclang and then run the libclang Python binding's unit tests.
+# There is an issue running on "windows-2019".
+# See 
https://github.com/llvm/llvm-project/issues/76601#issuecomment-1873049082.
 name: Build and run Python unit tests
 if: github.repository == 'llvm/llvm-project'
+runs-on: ubuntu-24.04
 strategy:
   fail-fast: false
   matrix:
 python-version: ["3.8", "3.13"]
-uses: ./.github/workflows/llvm-project-tests.yml
-with:
-  build_target: check-clang-python
-  projects: clang
-  # There is an issue running on "windows-2019".
-  # See 
https://github.com/llvm/llvm-project/issues/76601#issuecomment-1873049082.
-  os_list: '["ubuntu-24.04"]'
-  python_version: ${{ matrix.python-version }}
+steps:
+  - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 
v5.0.0
+  - name: Setup Python
+uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 
v5.4.0
+with:
+  python-version: ${{ matrix.python_version }}
+  - name: Setup ccache
+uses: 
hendrikmuhs/ccache-action@a1209f81afb8c005c13b4296c32e363431bffea5 # v1.2.17
+with:
+  max-size: 2G
+  key: spirv-ubuntu-24.04
+  variant: sccache
+  - name: Build and Test
+run: |
+  mkdir build
+  cmake -GNinja \
+-S llvm \
+-B build \
+-DCMAKE_BUILD_TYPE=Release \
+-DLLVM_ENABLE_ASSERTIONS=ON \
+-DCMAKE_C_COMPILER_LAUNCHER=sccache \
+-DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
+-DLLVM_ENABLE_PROJECTS=clang
+  ninja -C build check-clang-python

``




https://github.com/llvm/llvm-project/pull/153876
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [Github] Drop llvm-project-tests (PR #153877)

2025-08-15 Thread Aiden Grossman via llvm-branch-commits


https://github.com/boomanaiden154 created 
https://github.com/llvm/llvm-project/pull/153877

All users of this have been claned up so we can now drop it fully.



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)

2025-08-15 Thread Peter Collingbourne via llvm-branch-commits


https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/153873


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)

2025-08-15 Thread Peter Collingbourne via llvm-branch-commits


https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/153873


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE register (PR #153879)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec created 
https://github.com/llvm/llvm-project/pull/153879

None

>From 32fc4952dafa723bdff1f26f717b87cd8f4464b1 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Fri, 15 Aug 2025 14:03:15 -0700
Subject: [PATCH] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE
 register

---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 12 +
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h  |  1 +
 llvm/lib/Target/AMDGPU/GCNSubtarget.h |  4 ++
 llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir  | 54 +++
 4 files changed, 71 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp 
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index dd7c1914d3440..c1cca063aac6f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1206,6 +1206,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
 fixDsAtomicAsyncBarrierArriveB64(MI);
   if (ST.hasScratchBaseForwardingHazard())
 fixScratchBaseForwardingHazard(MI);
+  if (ST.setRegModeNeedsVNOPs())
+fixSetRegMode(MI);
 }
 
 static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo 
&TRI,
@@ -3546,3 +3548,13 @@ bool 
GCNHazardRecognizer::fixScratchBaseForwardingHazard(MachineInstr *MI) {
   AMDGPU::DepCtr::encodeFieldSaSdst(0), 0));
   return true;
 }
+
+bool GCNHazardRecognizer::fixSetRegMode(MachineInstr *MI) {
+  if (!isSSetReg(MI->getOpcode()) ||
+  MI->getOperand(1).getImm() != AMDGPU::Hwreg::ID_MODE)
+return false;
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
+  return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h 
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index e0982b46424b9..67beffadc0913 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -113,6 +113,7 @@ class GCNHazardRecognizer final : public 
ScheduleHazardRecognizer {
   bool fixGetRegWaitIdle(MachineInstr *MI);
   bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
   bool fixScratchBaseForwardingHazard(MachineInstr *MI);
+  bool fixSetRegMode(MachineInstr *MI);
 
   int checkMAIHazards(MachineInstr *MI);
   int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h 
b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 404a476a3076a..2a8385df3f934 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1345,6 +1345,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
 
+  bool setRegModeNeedsVNOPs() const {
+return GFX1250Insts && getGeneration() == GFX12;
+  }
+
   /// Return if operations acting on VGPR tuples require even alignment.
   bool needsAlignedVGPRs() const { return GFX90AInsts || GFX1250Insts; }
 
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir 
b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
index f4596b0832d97..170478539d8a9 100644
--- a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
@@ -493,3 +493,57 @@ body: |
 liveins: $vgpr0
 $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
 ...
+
+---
+name: s_setreg_b32_hwreg_mode
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $sgpr0
+; GCN-LABEL: name: s_setreg_b32_hwreg_mode
+; GCN: liveins: $sgpr0
+; GCN-NEXT: {{  $}}
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
+S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_b32_mode
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $sgpr0
+; GCN-LABEL: name: s_setreg_b32_mode
+; GCN: liveins: $sgpr0
+; GCN-NEXT: {{  $}}
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
+S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_imm32_b32_hwreg_mode
+tracksRegLiveness: true
+body: |
+  bb.0:
+; GCN-LABEL: name: s_setreg_imm32_b32_hwreg_mode
+; GCN: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode
+S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_imm32_b32_mode
+tracksRegLiveness: true
+body: |
+  bb.0:
+; GCN-LABEL: name: s_setreg_imm32_b32_mode
+; GCN: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit 
$mode
+S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit $mode
+...

_

[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec created 
https://github.com/llvm/llvm-project/pull/153880

This simply updates the pass's cognizance of these instructions, and for the
most part the hazards where they might be encountered do not exist for gfx12.
Nonetheless, encountering them has to be checked for as doing so would indicate
a compiler error.

Co-authored-by: Stephen Thomas 

>From 2f96c402497f80f8d31e4229f03b3ef8dd88cf4d Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Fri, 15 Aug 2025 14:10:55 -0700
Subject: [PATCH] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12
 waitcount instructions

This simply updates the pass's cognizance of these instructions, and for the
most part the hazards where they might be encountered do not exist for gfx12.
Nonetheless, encountering them has to be checked for as doing so would indicate
a compiler error.

Co-authored-by: Stephen Thomas 
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp 
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c1cca063aac6f..fa3ca27a5f47c 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1357,6 +1357,16 @@ bool 
GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
 // DsCnt corresponds to LGKMCnt here.
 return (Decoded.DsCnt == 0);
   }
+  case AMDGPU::S_WAIT_STORECNT:
+  case AMDGPU::S_WAIT_STORECNT_DSCNT:
+  case AMDGPU::S_WAIT_LOADCNT:
+  case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+  case AMDGPU::S_WAIT_SAMPLECNT:
+  case AMDGPU::S_WAIT_BVHCNT:
+  case AMDGPU::S_WAIT_DSCNT:
+  case AMDGPU::S_WAIT_EXPCNT:
+  case AMDGPU::S_WAIT_KMCNT:
+llvm_unreachable("unexpected wait count instruction");
   default:
 // SOPP instructions cannot mitigate the hazard.
 if (TII->isSOPP(MI))
@@ -2254,6 +2264,15 @@ int 
GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
 case AMDGPU::S_WAITCNT_EXPCNT:
 case AMDGPU::S_WAITCNT_LGKMCNT:
 case AMDGPU::S_WAIT_IDLE:
+case AMDGPU::S_WAIT_LOADCNT:
+case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+case AMDGPU::S_WAIT_SAMPLECNT:
+case AMDGPU::S_WAIT_BVHCNT:
+case AMDGPU::S_WAIT_STORECNT:
+case AMDGPU::S_WAIT_STORECNT_DSCNT:
+case AMDGPU::S_WAIT_EXPCNT:
+case AMDGPU::S_WAIT_DSCNT:
+case AMDGPU::S_WAIT_KMCNT:
   return true;
 default:
   break;

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE register (PR #153879)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


rampitec wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/153879?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#153881** https://app.graphite.dev/github/pr/llvm/llvm-project/153881?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#153880** https://app.graphite.dev/github/pr/llvm/llvm-project/153880?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#153879** https://app.graphite.dev/github/pr/llvm/llvm-project/153879?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/153879?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#153878** https://app.graphite.dev/github/pr/llvm/llvm-project/153878?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/153879
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec created 
https://github.com/llvm/llvm-project/pull/153881

Co-authored-by: Stephen Thomas 

>From 0c71fc2a1f291f245dabec98199295b3edd392e5 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin 
Date: Fri, 15 Aug 2025 14:18:56 -0700
Subject: [PATCH] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI.

Co-authored-by: Stephen Thomas 
---
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp 
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index fa3ca27a5f47c..49a681efc79c7 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1749,7 +1749,7 @@ bool 
GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
 
   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
   TII.get(AMDGPU::S_WAITCNT_DEPCTR))
-  .addImm(0x0fff);
+  .addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0));
 
   return true;
 }
@@ -1799,7 +1799,7 @@ bool 
GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
 if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
 SIInstrInfo::isEXP(I) ||
 (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- I.getOperand(0).getImm() == 0x0fff))
+ AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
   return HazardExpired;
 
 // Track registers writes

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


rampitec wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/153880?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#153881** https://app.graphite.dev/github/pr/llvm/llvm-project/153881?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#153880** https://app.graphite.dev/github/pr/llvm/llvm-project/153880?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/153880?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#153879** https://app.graphite.dev/github/pr/llvm/llvm-project/153879?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#153878** https://app.graphite.dev/github/pr/llvm/llvm-project/153878?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/153880
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


rampitec wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/153881?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#153881** https://app.graphite.dev/github/pr/llvm/llvm-project/153881?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/153881?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#153880** https://app.graphite.dev/github/pr/llvm/llvm-project/153880?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#153879** https://app.graphite.dev/github/pr/llvm/llvm-project/153879?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#153878** https://app.graphite.dev/github/pr/llvm/llvm-project/153878?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/153881
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE register (PR #153879)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec ready_for_review 
https://github.com/llvm/llvm-project/pull/153879
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE register (PR #153879)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/153879.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+12) 
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h (+1) 
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+4) 
- (modified) llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir (+54) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp 
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index dd7c1914d3440..c1cca063aac6f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1206,6 +1206,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
 fixDsAtomicAsyncBarrierArriveB64(MI);
   if (ST.hasScratchBaseForwardingHazard())
 fixScratchBaseForwardingHazard(MI);
+  if (ST.setRegModeNeedsVNOPs())
+fixSetRegMode(MI);
 }
 
 static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo 
&TRI,
@@ -3546,3 +3548,13 @@ bool 
GCNHazardRecognizer::fixScratchBaseForwardingHazard(MachineInstr *MI) {
   AMDGPU::DepCtr::encodeFieldSaSdst(0), 0));
   return true;
 }
+
+bool GCNHazardRecognizer::fixSetRegMode(MachineInstr *MI) {
+  if (!isSSetReg(MI->getOpcode()) ||
+  MI->getOperand(1).getImm() != AMDGPU::Hwreg::ID_MODE)
+return false;
+
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
+  return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h 
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index e0982b46424b9..67beffadc0913 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -113,6 +113,7 @@ class GCNHazardRecognizer final : public 
ScheduleHazardRecognizer {
   bool fixGetRegWaitIdle(MachineInstr *MI);
   bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
   bool fixScratchBaseForwardingHazard(MachineInstr *MI);
+  bool fixSetRegMode(MachineInstr *MI);
 
   int checkMAIHazards(MachineInstr *MI);
   int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h 
b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 404a476a3076a..2a8385df3f934 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1345,6 +1345,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
 
+  bool setRegModeNeedsVNOPs() const {
+return GFX1250Insts && getGeneration() == GFX12;
+  }
+
   /// Return if operations acting on VGPR tuples require even alignment.
   bool needsAlignedVGPRs() const { return GFX90AInsts || GFX1250Insts; }
 
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir 
b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
index f4596b0832d97..170478539d8a9 100644
--- a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
@@ -493,3 +493,57 @@ body: |
 liveins: $vgpr0
 $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
 ...
+
+---
+name: s_setreg_b32_hwreg_mode
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $sgpr0
+; GCN-LABEL: name: s_setreg_b32_hwreg_mode
+; GCN: liveins: $sgpr0
+; GCN-NEXT: {{  $}}
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
+S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_b32_mode
+tracksRegLiveness: true
+body: |
+  bb.0:
+liveins: $sgpr0
+; GCN-LABEL: name: s_setreg_b32_mode
+; GCN: liveins: $sgpr0
+; GCN-NEXT: {{  $}}
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
+S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_imm32_b32_hwreg_mode
+tracksRegLiveness: true
+body: |
+  bb.0:
+; GCN-LABEL: name: s_setreg_imm32_b32_hwreg_mode
+; GCN: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode
+S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_imm32_b32_mode
+tracksRegLiveness: true
+body: |
+  bb.0:
+; GCN-LABEL: name: s_setreg_imm32_b32_mode
+; GCN: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit 
$mode
+S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit $mode
+...

``




https://github.com/llvm/llvm-project/pull/153879
___
llvm-branch-commits m

[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec ready_for_review 
https://github.com/llvm/llvm-project/pull/153880
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)


Changes

This simply updates the pass's cognizance of these instructions, and for the
most part the hazards where they might be encountered do not exist for gfx12.
Nonetheless, encountering them has to be checked for as doing so would indicate
a compiler error.

Co-authored-by: Stephen Thomas 

---
Full diff: https://github.com/llvm/llvm-project/pull/153880.diff


1 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+19) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp 
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c1cca063aac6f..fa3ca27a5f47c 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1357,6 +1357,16 @@ bool 
GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
 // DsCnt corresponds to LGKMCnt here.
 return (Decoded.DsCnt == 0);
   }
+  case AMDGPU::S_WAIT_STORECNT:
+  case AMDGPU::S_WAIT_STORECNT_DSCNT:
+  case AMDGPU::S_WAIT_LOADCNT:
+  case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+  case AMDGPU::S_WAIT_SAMPLECNT:
+  case AMDGPU::S_WAIT_BVHCNT:
+  case AMDGPU::S_WAIT_DSCNT:
+  case AMDGPU::S_WAIT_EXPCNT:
+  case AMDGPU::S_WAIT_KMCNT:
+llvm_unreachable("unexpected wait count instruction");
   default:
 // SOPP instructions cannot mitigate the hazard.
 if (TII->isSOPP(MI))
@@ -2254,6 +2264,15 @@ int 
GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
 case AMDGPU::S_WAITCNT_EXPCNT:
 case AMDGPU::S_WAITCNT_LGKMCNT:
 case AMDGPU::S_WAIT_IDLE:
+case AMDGPU::S_WAIT_LOADCNT:
+case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+case AMDGPU::S_WAIT_SAMPLECNT:
+case AMDGPU::S_WAIT_BVHCNT:
+case AMDGPU::S_WAIT_STORECNT:
+case AMDGPU::S_WAIT_STORECNT_DSCNT:
+case AMDGPU::S_WAIT_EXPCNT:
+case AMDGPU::S_WAIT_DSCNT:
+case AMDGPU::S_WAIT_KMCNT:
   return true;
 default:
   break;

``




https://github.com/llvm/llvm-project/pull/153880
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)

2025-08-15 Thread Stanislav Mekhanoshin via llvm-branch-commits


https://github.com/rampitec ready_for_review 
https://github.com/llvm/llvm-project/pull/153881
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)


Changes

Co-authored-by: Stephen Thomas 

---
Full diff: https://github.com/llvm/llvm-project/pull/153881.diff


1 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+2-2) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp 
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index fa3ca27a5f47c..49a681efc79c7 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1749,7 +1749,7 @@ bool 
GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
 
   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
   TII.get(AMDGPU::S_WAITCNT_DEPCTR))
-  .addImm(0x0fff);
+  .addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0));
 
   return true;
 }
@@ -1799,7 +1799,7 @@ bool 
GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
 if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
 SIInstrInfo::isEXP(I) ||
 (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- I.getOperand(0).getImm() == 0x0fff))
+ AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
   return HazardExpired;
 
 // Track registers writes

``




https://github.com/llvm/llvm-project/pull/153881
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)

2025-08-15 Thread Peter Collingbourne via llvm-branch-commits


https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/153873


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)

2025-08-15 Thread Peter Collingbourne via llvm-branch-commits


https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/153873


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [Github] Remove call to llvm-project-tests.yml from mlir-spirv-tests.yml (PR #153871)

2025-08-15 Thread Aiden Grossman via llvm-branch-commits


https://github.com/boomanaiden154 created 
https://github.com/llvm/llvm-project/pull/153871

This will eventually allow for removing llvm-project-tests.yml. This
should significantly reduce the complexity of this workflow (including
the complexity of llvm-project-tests.yml) at the cost of a little bit of
duplication.



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [Github] Remove call to llvm-project-tests.yml from mlir-spirv-tests.yml (PR #153871)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-github-workflow

Author: Aiden Grossman (boomanaiden154)


Changes

This will eventually allow for removing llvm-project-tests.yml. This
should significantly reduce the complexity of this workflow (including
the complexity of llvm-project-tests.yml) at the cost of a little bit of
duplication.


---
Full diff: https://github.com/llvm/llvm-project/pull/153871.diff


1 Files Affected:

- (modified) .github/workflows/mlir-spirv-tests.yml (+25-6) 


``diff
diff --git a/.github/workflows/mlir-spirv-tests.yml 
b/.github/workflows/mlir-spirv-tests.yml
index 48b6c69a61f50..658858feb8814 100644
--- a/.github/workflows/mlir-spirv-tests.yml
+++ b/.github/workflows/mlir-spirv-tests.yml
@@ -24,9 +24,28 @@ jobs:
   check_spirv:
 if: github.repository_owner == 'llvm'
 name: Test MLIR SPIR-V
-uses: ./.github/workflows/llvm-project-tests.yml
-with:
-  build_target: check-mlir
-  projects: mlir
-  extra_cmake_args: '-DLLVM_TARGETS_TO_BUILD="host" 
-DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON'
-  os_list: '["ubuntu-24.04"]'
+runs-on: ubuntu-24.04
+container:
+  image: ghcr.io/llvm/ci-ubuntu-24.04:latest
+steps:
+  - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 
v5.0.0
+  - name: Setup ccache
+uses: 
hendrikmuhs/ccache-action@a1209f81afb8c005c13b4296c32e363431bffea5 # v1.2.17
+with:
+  max-size: 2G
+  key: spirv-mlir-ubuntu-24.04
+  variant: sccache
+  - name: Build and Test
+run: |
+  mkdir build
+  cmake -GNinja \
+-S llvm \
+-B build \
+-DCMAKE_BUILD_TYPE=Release \
+-DLLVM_ENABLE_ASSERTIONS=ON \
+-DCMAKE_C_COMPILER_LAUNCHER=sccache \
+-DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
+-DLLVM_TARGETS_TO_BUILD="host" \
+-DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON \
+-DLLVM_TARGETS_TO_BUILD=mlir
+  ninja -C build check-mlir

``




https://github.com/llvm/llvm-project/pull/153871
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Github] Remove call to llvm-project-tests.yml from mlir-spirv-tests.yml (PR #153871)

2025-08-15 Thread Aiden Grossman via llvm-branch-commits


https://github.com/boomanaiden154 updated 
https://github.com/llvm/llvm-project/pull/153871

>From a61dece065902c60c9ea0f80ed133c4ad92c549f Mon Sep 17 00:00:00 2001
From: Aiden Grossman 
Date: Fri, 15 Aug 2025 20:31:54 +
Subject: [PATCH] fix

Created using spr 1.3.6
---
 .github/workflows/mlir-spirv-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mlir-spirv-tests.yml 
b/.github/workflows/mlir-spirv-tests.yml
index 658858feb8814..dfd3374dcc799 100644
--- a/.github/workflows/mlir-spirv-tests.yml
+++ b/.github/workflows/mlir-spirv-tests.yml
@@ -47,5 +47,5 @@ jobs:
 -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
 -DLLVM_TARGETS_TO_BUILD="host" \
 -DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON \
--DLLVM_TARGETS_TO_BUILD=mlir
+-LLVM_ENABLE_PROJECTS=mlir
   ninja -C build check-mlir

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Github] Remove call to llvm-project-tests.yml from mlir-spirv-tests.yml (PR #153871)

2025-08-15 Thread Aiden Grossman via llvm-branch-commits


https://github.com/boomanaiden154 updated 
https://github.com/llvm/llvm-project/pull/153871

>From a61dece065902c60c9ea0f80ed133c4ad92c549f Mon Sep 17 00:00:00 2001
From: Aiden Grossman 
Date: Fri, 15 Aug 2025 20:31:54 +
Subject: [PATCH 1/2] fix

Created using spr 1.3.6
---
 .github/workflows/mlir-spirv-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mlir-spirv-tests.yml 
b/.github/workflows/mlir-spirv-tests.yml
index 658858feb8814..dfd3374dcc799 100644
--- a/.github/workflows/mlir-spirv-tests.yml
+++ b/.github/workflows/mlir-spirv-tests.yml
@@ -47,5 +47,5 @@ jobs:
 -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
 -DLLVM_TARGETS_TO_BUILD="host" \
 -DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON \
--DLLVM_TARGETS_TO_BUILD=mlir
+-LLVM_ENABLE_PROJECTS=mlir
   ninja -C build check-mlir

>From 41b41b8785f61974fa132f31e9c6a8317d5575ee Mon Sep 17 00:00:00 2001
From: Aiden Grossman 
Date: Fri, 15 Aug 2025 20:34:19 +
Subject: [PATCH 2/2] fix

Created using spr 1.3.6
---
 .github/workflows/mlir-spirv-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mlir-spirv-tests.yml 
b/.github/workflows/mlir-spirv-tests.yml
index dfd3374dcc799..78952ccad2642 100644
--- a/.github/workflows/mlir-spirv-tests.yml
+++ b/.github/workflows/mlir-spirv-tests.yml
@@ -47,5 +47,5 @@ jobs:
 -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
 -DLLVM_TARGETS_TO_BUILD="host" \
 -DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON \
--LLVM_ENABLE_PROJECTS=mlir
+-DLLVM_ENABLE_PROJECTS=mlir
   ninja -C build check-mlir

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Peter Collingbourne (pcc)


Changes

With gcc 15 we end up emitting a reference to the
std::__glibcxx_assert_fail function because of this change:
https://github.com/gcc-mirror/gcc/commit/361d230fd7800a7e749aba8ed020f54f5c26d504
combined with assertion checks in the std::atomic implementation.

This reference is undefined with dfsan causing the test to fail. Fix it
by defining the macro that disables assertions.


---
Full diff: https://github.com/llvm/llvm-project/pull/153873.diff


1 Files Affected:

- (modified) compiler-rt/test/dfsan/atomic.cpp (+5-2) 


``diff
diff --git a/compiler-rt/test/dfsan/atomic.cpp 
b/compiler-rt/test/dfsan/atomic.cpp
index 22ee323c752f8..73e1cbd17a7cd 100644
--- a/compiler-rt/test/dfsan/atomic.cpp
+++ b/compiler-rt/test/dfsan/atomic.cpp
@@ -1,9 +1,12 @@
-// RUN: %clangxx_dfsan %s -fno-exceptions -o %t && %run %t
-// RUN: %clangxx_dfsan -DORIGIN_TRACKING -mllvm -dfsan-track-origins=1 %s 
-fno-exceptions -o %t && %run %t
+// RUN: %clangxx_dfsan %s -fno-exceptions -D_GLIBCXX_NO_ASSERTIONS -o %t && 
%run %t
+// RUN: %clangxx_dfsan -DORIGIN_TRACKING -mllvm -dfsan-track-origins=1 %s 
-fno-exceptions -D_GLIBCXX_NO_ASSERTIONS -o %t && %run %t
 //
 // Use -fno-exceptions to turn off exceptions to avoid instrumenting
 // __cxa_begin_catch, std::terminate and __gxx_personality_v0.
 //
+// Use -D_GLIBCXX_NO_ASSERTIONS to avoid depending on
+// std::__glibcxx_assert_fail with gcc >= 15.
+//
 // TODO: Support builtin atomics. For example, 
https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
 // DFSan instrumentation pass cannot identify builtin callsites yet.
 

``




https://github.com/llvm/llvm-project/pull/153873
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)

2025-08-15 Thread Peter Collingbourne via llvm-branch-commits


https://github.com/pcc created https://github.com/llvm/llvm-project/pull/153873

With gcc 15 we end up emitting a reference to the
std::__glibcxx_assert_fail function because of this change:
https://github.com/gcc-mirror/gcc/commit/361d230fd7800a7e749aba8ed020f54f5c26d504
combined with assertion checks in the std::atomic implementation.

This reference is undefined with dfsan causing the test to fail. Fix it
by defining the macro that disables assertions.



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [mlir] [OpenMP][OMPIRBuilder] Use device shared memory for arg structures (PR #150925)

2025-08-15 Thread Sergio Afonso via llvm-branch-commits


skatrak wrote:

I just pushed a commit replacing callbacks with `CodeExtractor` subclasses. I 
also moved some of the logic to create a `CodeExtractor` from an `OutlineInfo` 
so that hopefully using these custom `CodeExtractor`s doesn't turn out too much 
of a hassle. Let me know if this works for you @Meinersbur, @bhandarkar-pranav.

Next week I'll try to work on obtaining and using a proper deallocation block, 
so that we don't have to override it and get everything in better shape for 
merging. That'll be an additional PR added to the stack.

https://github.com/llvm/llvm-project/pull/150925
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [DirectX] Add `split-section` to `llvm-objcopy` and implement it for `DXContainer` (PR #153265)

2025-08-15 Thread Finn Plummer via llvm-branch-commits

inbelic wrote:

> Rather than a new option, I'd rather you just add support for 
> `--only-section` and `--dump-section`, which can then be specified in 
> conjunction with each other to achieve the same result. IIRC, a while back, 
> `--split-dwo` was considered a mistake, because the same effect could be 
> achieved by specifying `--extract-dwo` and `--strip-dwo` and `--split-dwo` 
> doesn't exist in GNU objcopy (which we aim for broad compatibility with).

I assume you mean `--remove-section` and `--dump-section`. The issue for our 
use-case is that `--dump-section` only outputs the section contents, it does 
not output a valid object with just one section specified.

It could be that such an option isn't really applicable to other object 
formats, and we could go about this as a `DXContainer` specific option.

We thought it best not to have a different implementation of `dump-section` for 
`DXContainer` that outputs more than just the contents.

https://github.com/llvm/llvm-project/pull/153265
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)

2025-08-15 Thread Florian Mayer via llvm-branch-commits


https://github.com/fmayer approved this pull request.


https://github.com/llvm/llvm-project/pull/153873
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [Github] Remove call to llvm-project-tests from libclang tests (PR #153876)

2025-08-15 Thread Aiden Grossman via llvm-branch-commits


https://github.com/boomanaiden154 created 
https://github.com/llvm/llvm-project/pull/153876

This allows for removing llvm-project-tests.yml. This significantly
reduces the complexity of this workflow (including the complexity of
llvm-project-tests.yml) at the cost of a little bit of duplication with
the other workflows that were also using llvm-project-tests.yml.



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)

2025-08-15 Thread Peter Collingbourne via llvm-branch-commits


pcc wrote:

Landed manually

https://github.com/llvm/llvm-project/pull/153873
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)

2025-08-15 Thread Peter Collingbourne via llvm-branch-commits


https://github.com/pcc closed https://github.com/llvm/llvm-project/pull/153873
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] 3c37d74 - Revert "[flang] Lower EOSHIFT into hlfir.eoshift. (#153106)"

2025-08-15 Thread via llvm-branch-commits


Author: Slava Zakharin
Date: 2025-08-15T17:38:13-07:00
New Revision: 3c37d74984d24b503c05e338f95c5cb1c2bcb57e

URL: 
https://github.com/llvm/llvm-project/commit/3c37d74984d24b503c05e338f95c5cb1c2bcb57e
DIFF: 
https://github.com/llvm/llvm-project/commit/3c37d74984d24b503c05e338f95c5cb1c2bcb57e.diff

LOG: Revert "[flang] Lower EOSHIFT into hlfir.eoshift. (#153106)"

This reverts commit 25285b3476292fea239fdab945ca39d156c782d5.

Added: 


Modified: 
flang/lib/Lower/HlfirIntrinsics.cpp

Removed: 
flang/test/Lower/HLFIR/eoshift.f90



diff  --git a/flang/lib/Lower/HlfirIntrinsics.cpp 
b/flang/lib/Lower/HlfirIntrinsics.cpp
index 3b0f2e35cd5b5..6e1d06a25924b 100644
--- a/flang/lib/Lower/HlfirIntrinsics.cpp
+++ b/flang/lib/Lower/HlfirIntrinsics.cpp
@@ -170,17 +170,6 @@ class HlfirCShiftLowering : public 
HlfirTransformationalIntrinsic {
 mlir::Type stmtResultType) override;
 };
 
-class HlfirEOShiftLowering : public HlfirTransformationalIntrinsic {
-public:
-  using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic;
-
-protected:
-  mlir::Value
-  lowerImpl(const Fortran::lower::PreparedActualArguments &loweredActuals,
-const fir::IntrinsicArgumentLoweringRules *argLowering,
-mlir::Type stmtResultType) override;
-};
-
 class HlfirReshapeLowering : public HlfirTransformationalIntrinsic {
 public:
   using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic;
@@ -441,33 +430,6 @@ mlir::Value HlfirCShiftLowering::lowerImpl(
   return createOp(resultType, operands);
 }
 
-mlir::Value HlfirEOShiftLowering::lowerImpl(
-const Fortran::lower::PreparedActualArguments &loweredActuals,
-const fir::IntrinsicArgumentLoweringRules *argLowering,
-mlir::Type stmtResultType) {
-  auto operands = getOperandVector(loweredActuals, argLowering);
-  assert(operands.size() == 4);
-  mlir::Value array = operands[0];
-  mlir::Value shift = operands[1];
-  mlir::Value boundary = operands[2];
-  mlir::Value dim = operands[3];
-  // If DIM is present, then dereference it if it is a ref.
-  if (dim)
-dim = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{dim});
-
-  mlir::Type resultType = computeResultType(array, stmtResultType);
-
-  // Scalar logical constant boundary might be represented using i1, i2, ...
-  // type. We need to cast it to fir.logical type of the ARRAY/result.
-  if (auto logicalTy = mlir::dyn_cast(
-  hlfir::getFortranElementType(resultType)))
-if (boundary && fir::isa_trivial(boundary.getType()) &&
-boundary.getType() != logicalTy)
-  boundary = builder.createConvert(loc, logicalTy, boundary);
-
-  return createOp(resultType, array, shift, boundary, dim);
-}
-
 mlir::Value HlfirReshapeLowering::lowerImpl(
 const Fortran::lower::PreparedActualArguments &loweredActuals,
 const fir::IntrinsicArgumentLoweringRules *argLowering,
@@ -527,9 +489,6 @@ std::optional 
Fortran::lower::lowerHlfirIntrinsic(
   if (name == "cshift")
 return HlfirCShiftLowering{builder, loc}.lower(loweredActuals, argLowering,
stmtResultType);
-  if (name == "eoshift")
-return HlfirEOShiftLowering{builder, loc}.lower(loweredActuals, 
argLowering,
-stmtResultType);
   if (name == "reshape")
 return HlfirReshapeLowering{builder, loc}.lower(loweredActuals, 
argLowering,
 stmtResultType);

diff  --git a/flang/test/Lower/HLFIR/eoshift.f90 
b/flang/test/Lower/HLFIR/eoshift.f90
deleted file mode 100644
index 3b2570ab59365..0
--- a/flang/test/Lower/HLFIR/eoshift.f90
+++ /dev/null
@@ -1,259 +0,0 @@
-! Test lowering of EOSHIFT intrinsic to HLFIR
-! RUN: bbc -emit-hlfir -o - -I nowhere %s 2>&1 | FileCheck %s
-
-module eoshift_types
-  type t
-  end type t
-end module eoshift_types
-
-! 1d shift by scalar
-subroutine eoshift1(a, s)
-  integer :: a(:), s
-  a = EOSHIFT(a, 2)
-end subroutine
-! CHECK-LABEL:   func.func @_QPeoshift1(
-! CHECK-SAME:  %[[VAL_0:.*]]: 
!fir.box> {fir.bindc_name = "a"},
-! CHECK-SAME:  %[[VAL_1:.*]]: !fir.ref 
{fir.bindc_name = "s"}) {
-! CHECK:   %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
-! CHECK:   %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]]
-! CHECK:   %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]]
-! CHECK:   %[[VAL_5:.*]] = arith.constant 2 : i32
-! CHECK:   %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] : 
(!fir.box>, i32) -> !hlfir.expr
-! CHECK:   hlfir.assign %[[VAL_6]] to %[[VAL_3]]#0 : 
!hlfir.expr, !fir.box>
-! CHECK:   hlfir.destroy %[[VAL_6]] : !hlfir.expr
-! CHECK:   return
-! CHECK: }
-
-! 1d shift by scalar with dim
-subroutine eoshift2(a, s)
-  integer :: a(:), s
-  a = EOSHIFT(a, 2, dim=1)
-end subroutine
-! CHECK-LABEL:

[llvm-branch-commits] [llvm] [RISCV] Track Linker Relaxable through Assembly Relaxation (PR #153670)

2025-08-15 Thread Fangrui Song via llvm-branch-commits


https://github.com/MaskRay approved this pull request.


https://github.com/llvm/llvm-project/pull/153670
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [Github] Remove call to llvm-project-tests from libclang tests (PR #153876)

2025-08-15 Thread Jannick Kremer via llvm-branch-commits


https://github.com/DeinAlptraum edited 
https://github.com/llvm/llvm-project/pull/153876
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [Github] Remove call to llvm-project-tests from libclang tests (PR #153876)

2025-08-15 Thread Jannick Kremer via llvm-branch-commits


https://github.com/DeinAlptraum commented:

Sounds good. A few comments:

Please also remove the `.github/workflows/llvm-project-tests.yml` entry under
`on.push.paths` & `on.pull_request.paths` in the workflow file.

https://github.com/llvm/llvm-project/pull/153876
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [Github] Remove call to llvm-project-tests from libclang tests (PR #153876)

2025-08-15 Thread Jannick Kremer via llvm-branch-commits



@@ -25,17 +25,36 @@ on:
 jobs:
   check-clang-python:
 # Build libclang and then run the libclang Python binding's unit tests.
+# There is an issue running on "windows-2019".
+# See 
https://github.com/llvm/llvm-project/issues/76601#issuecomment-1873049082.
 name: Build and run Python unit tests
 if: github.repository == 'llvm/llvm-project'
+runs-on: ubuntu-24.04
 strategy:
   fail-fast: false
   matrix:
 python-version: ["3.8", "3.13"]
-uses: ./.github/workflows/llvm-project-tests.yml
-with:
-  build_target: check-clang-python
-  projects: clang
-  # There is an issue running on "windows-2019".
-  # See 
https://github.com/llvm/llvm-project/issues/76601#issuecomment-1873049082.
-  os_list: '["ubuntu-24.04"]'
-  python_version: ${{ matrix.python-version }}
+steps:
+  - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 
v5.0.0
+  - name: Setup Python
+uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # 
v5.4.0
+with:
+  python-version: ${{ matrix.python_version }}

DeinAlptraum wrote:

Something doesn't seem to be working here, the action raises several warnings 
and the version used for the tests at the end was the pre-installed 3.12 (see 
e.g. the "Found Python3" line at the start of the  "Build and Test" step, or 
the Python call towards the end of the step).
A working run for comparison: 
https://github.com/llvm/llvm-project/actions/runs/16983718164/job/48198262227?pr=153746

https://github.com/llvm/llvm-project/pull/153876
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/21.x: [clang][LoongArch] Ensure `target("lasx")` implies LSX support (#153542) (PR #153739)

2025-08-15 Thread Lu Weining via llvm-branch-commits


https://github.com/SixWeining approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/153739
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LoongArch] Reduce number of reserved relocations when relax enabled (PR #153769)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:



@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/153769.diff


2 Files Affected:

- (modified) llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp 
(+9-18) 
- (modified) llvm/test/MC/LoongArch/Relocations/relax-attr.s (+4-3) 


``diff
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp 
b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index ca5d27d54bb81..c6eda4fd63a6b 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -143,8 +143,6 @@ static void fixupLeb128(MCContext &Ctx, const MCFixup 
&Fixup, uint8_t *Data,
 void LoongArchAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
  const MCValue &Target, uint8_t *Data,
  uint64_t Value, bool IsResolved) {
-  if (IsResolved && shouldForceRelocation(Fixup, Target))
-IsResolved = false;
   IsResolved = addReloc(F, Fixup, Target, Value, IsResolved);
   if (!Value)
 return; // Doesn't change encoding.
@@ -176,20 +174,6 @@ void LoongArchAsmBackend::applyFixup(const MCFragment &F, 
const MCFixup &Fixup,
   }
 }
 
-bool LoongArchAsmBackend::shouldForceRelocation(const MCFixup &Fixup,
-const MCValue &Target) {
-  switch (Fixup.getKind()) {
-  default:
-return STI.hasFeature(LoongArch::FeatureRelax);
-  case FK_Data_1:
-  case FK_Data_2:
-  case FK_Data_4:
-  case FK_Data_8:
-  case FK_Data_leb128:
-return !Target.isAbsolute();
-  }
-}
-
 static inline std::pair
 getRelocPairForSize(unsigned Size) {
   switch (Size) {
@@ -484,9 +468,16 @@ bool LoongArchAsmBackend::addReloc(const MCFragment &F, 
const MCFixup &Fixup,
 return false;
   }
 
-  IsResolved = Fallback();
   // If linker relaxation is enabled and supported by the current relocation,
-  // append a RELAX relocation.
+  // generate a relocation and then append a RELAX.
+  if (Fixup.isLinkerRelaxable())
+IsResolved = false;
+  if (IsResolved && Fixup.isPCRel())
+IsResolved = isPCRelFixupResolved(Target.getAddSym(), F);
+
+  if (!IsResolved)
+Asm->getWriter().recordRelocation(F, Fixup, Target, FixedValue);
+
   if (Fixup.isLinkerRelaxable()) {
 auto FA = MCFixup::create(Fixup.getOffset(), nullptr, ELF::R_LARCH_RELAX);
 Asm->getWriter().recordRelocation(F, FA, MCValue::get(nullptr),
diff --git a/llvm/test/MC/LoongArch/Relocations/relax-attr.s 
b/llvm/test/MC/LoongArch/Relocations/relax-attr.s
index e3e9038b755d3..d94d32ebd7ab0 100644
--- a/llvm/test/MC/LoongArch/Relocations/relax-attr.s
+++ b/llvm/test/MC/LoongArch/Relocations/relax-attr.s
@@ -17,11 +17,8 @@
 # CHECKR-NEXT: 0x4 R_LARCH_CALL36 foo 0x0
 # CHECKR-NEXT: 0x4 R_LARCH_RELAX - 0x0
 # CHECKR-NEXT: 0x10 R_LARCH_B21 .L0 0x0
-# CHECKR-NEXT: 0x14 R_LARCH_B21 .L1 0x0
 # CHECKR-NEXT: 0x18 R_LARCH_B16 .L0 0x0
-# CHECKR-NEXT: 0x1C R_LARCH_B16 .L1 0x0
 # CHECKR-NEXT: 0x20 R_LARCH_B26 .L0 0x0
-# CHECKR-NEXT: 0x24 R_LARCH_B26 .L1 0x0
 # CHECKR-NEXT:   }
 # CHECKR-NEXT:   Section ({{.*}}) .rela.data {
 # CHECKR-NEXT: 0x0 R_LARCH_64 .L1 0x0
@@ -36,6 +33,10 @@
 
 .L1:
   nop
+## Relocations for branches to .L0 must be reserved and be fixed up by linker
+## when linker relaxation enabled, because of the relaxable call36 instruction.
+## Branches to .L1 can be resolved correctly at compile time, so their
+## relocations can simply be removed.
   bnez $a0, .L0
   beqz $a0, .L1
   beq  $a0, $a1, .L0

``




https://github.com/llvm/llvm-project/pull/153769
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [LoongArch] Reduce number of reserved relocations when relax enabled (PR #153769)

2025-08-15 Thread via llvm-branch-commits


https://github.com/zhaoqi5 created 
https://github.com/llvm/llvm-project/pull/153769

None

>From f67324528d93ca3e908f39e8e89caef5ecc3e11f Mon Sep 17 00:00:00 2001
From: Qi Zhao 
Date: Fri, 15 Aug 2025 17:12:33 +0800
Subject: [PATCH] [LoongArch] Reduce number of reserved relocations when relax
 enabled

---
 .../MCTargetDesc/LoongArchAsmBackend.cpp  | 27 +++
 .../MC/LoongArch/Relocations/relax-attr.s |  7 ++---
 2 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp 
b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index ca5d27d54bb81..c6eda4fd63a6b 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -143,8 +143,6 @@ static void fixupLeb128(MCContext &Ctx, const MCFixup 
&Fixup, uint8_t *Data,
 void LoongArchAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
  const MCValue &Target, uint8_t *Data,
  uint64_t Value, bool IsResolved) {
-  if (IsResolved && shouldForceRelocation(Fixup, Target))
-IsResolved = false;
   IsResolved = addReloc(F, Fixup, Target, Value, IsResolved);
   if (!Value)
 return; // Doesn't change encoding.
@@ -176,20 +174,6 @@ void LoongArchAsmBackend::applyFixup(const MCFragment &F, 
const MCFixup &Fixup,
   }
 }
 
-bool LoongArchAsmBackend::shouldForceRelocation(const MCFixup &Fixup,
-const MCValue &Target) {
-  switch (Fixup.getKind()) {
-  default:
-return STI.hasFeature(LoongArch::FeatureRelax);
-  case FK_Data_1:
-  case FK_Data_2:
-  case FK_Data_4:
-  case FK_Data_8:
-  case FK_Data_leb128:
-return !Target.isAbsolute();
-  }
-}
-
 static inline std::pair
 getRelocPairForSize(unsigned Size) {
   switch (Size) {
@@ -484,9 +468,16 @@ bool LoongArchAsmBackend::addReloc(const MCFragment &F, 
const MCFixup &Fixup,
 return false;
   }
 
-  IsResolved = Fallback();
   // If linker relaxation is enabled and supported by the current relocation,
-  // append a RELAX relocation.
+  // generate a relocation and then append a RELAX.
+  if (Fixup.isLinkerRelaxable())
+IsResolved = false;
+  if (IsResolved && Fixup.isPCRel())
+IsResolved = isPCRelFixupResolved(Target.getAddSym(), F);
+
+  if (!IsResolved)
+Asm->getWriter().recordRelocation(F, Fixup, Target, FixedValue);
+
   if (Fixup.isLinkerRelaxable()) {
 auto FA = MCFixup::create(Fixup.getOffset(), nullptr, ELF::R_LARCH_RELAX);
 Asm->getWriter().recordRelocation(F, FA, MCValue::get(nullptr),
diff --git a/llvm/test/MC/LoongArch/Relocations/relax-attr.s 
b/llvm/test/MC/LoongArch/Relocations/relax-attr.s
index e3e9038b755d3..d94d32ebd7ab0 100644
--- a/llvm/test/MC/LoongArch/Relocations/relax-attr.s
+++ b/llvm/test/MC/LoongArch/Relocations/relax-attr.s
@@ -17,11 +17,8 @@
 # CHECKR-NEXT: 0x4 R_LARCH_CALL36 foo 0x0
 # CHECKR-NEXT: 0x4 R_LARCH_RELAX - 0x0
 # CHECKR-NEXT: 0x10 R_LARCH_B21 .L0 0x0
-# CHECKR-NEXT: 0x14 R_LARCH_B21 .L1 0x0
 # CHECKR-NEXT: 0x18 R_LARCH_B16 .L0 0x0
-# CHECKR-NEXT: 0x1C R_LARCH_B16 .L1 0x0
 # CHECKR-NEXT: 0x20 R_LARCH_B26 .L0 0x0
-# CHECKR-NEXT: 0x24 R_LARCH_B26 .L1 0x0
 # CHECKR-NEXT:   }
 # CHECKR-NEXT:   Section ({{.*}}) .rela.data {
 # CHECKR-NEXT: 0x0 R_LARCH_64 .L1 0x0
@@ -36,6 +33,10 @@
 
 .L1:
   nop
+## Relocations for branches to .L0 must be reserved and be fixed up by linker
+## when linker relaxation enabled, because of the relaxable call36 instruction.
+## Branches to .L1 can be resolved correctly at compile time, so their
+## relocations can simply be removed.
   bnez $a0, .L0
   beqz $a0, .L1
   beq  $a0, $a1, .L0

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AArch64][ISel] Select constructive EXT_ZZI pseudo instruction (PR #152554)

2025-08-15 Thread Paul Walker via llvm-branch-commits


https://github.com/paulwalker-arm approved this pull request.


https://github.com/llvm/llvm-project/pull/152554
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [DirectX] Add `split-section` to `llvm-objcopy` and implement it for `DXContainer` (PR #153265)

2025-08-15 Thread James Henderson via llvm-branch-commits

jh7370 wrote:

> @inbelic can confirm, but I thought the intent was for this to work like 
> `--split-dwo` where it also removes the section from the original object.
> 
> We have a few common tooling cases for DX files where the compiler generates 
> an object with a bunch of sections that you may strip out later (debug info, 
> runtime reflection, runtime signatures). What we'd like to have is a tooling 
> path where we can do that with a single invocation of objcopy something like:
> 
> ```
> llvm-objcopy  --split-section=RTS0= 
> --split-section=STAT= --split-section=ILDB= new debug info>
> ```
> 
> I suggested making this a general option since it could be implemented for 
> any object file type, but if you think it is better to do it as a DX-specific 
> option we could do that as well.

Rather than a new option, I'd rather you just add support for `--only-section` 
and `--dump-section`, which can then be specified in conjunction with each 
other to achieve the same result. IIRC, a while back, `--split-dwo` was 
considered a mistake, because the same effect could be achieved by specifying 
`--extract-dwo` and `--strip-dwo` and `--split-dwo` doesn't exist in GNU 
objcopy (which we aim for broad compatibility with).

https://github.com/llvm/llvm-project/pull/153265
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [DirectX] Add `split-section` to `llvm-objcopy` and implement it for `DXContainer` (PR #153265)

2025-08-15 Thread James Henderson via llvm-branch-commits


https://github.com/jh7370 requested changes to this pull request.

Requesting changes to prevent this going in without my review etc.

https://github.com/llvm/llvm-project/pull/153265
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [DirectX] Add support for `remove-section` of `DXContainer` for `llvm-objcopy` (PR #153246)

2025-08-15 Thread James Henderson via llvm-branch-commits



@@ -12,18 +12,18 @@ Header:
   Version:
 Major:   1
 Minor:   0
-## FileSize = 1996 - 8 (FKE0 content) - 1688 (FKE4 content)
+## FileSize = 1996 - 8 (FKE1 content) - 1688 (FKE4 content)
 ##  - 8 (2 part offsets) - 16 (2 part headers)
 ##  = 276
-## CHECK: FileSize:   276
+# CHECK: FileSize:   276
   FileSize:1996
-## CHECK: PartCount:  5
+# CHECK: PartCount:  5

jh7370 wrote:

Can any of these be `CHECK-NEXT:`?

https://github.com/llvm/llvm-project/pull/153246
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [DirectX] Add support for `remove-section` of `DXContainer` for `llvm-objcopy` (PR #153246)

2025-08-15 Thread James Henderson via llvm-branch-commits



@@ -25,13 +25,13 @@ Header:
 Minor:   0
 ## FileSize = 1984 - 24 (RTS0 content) - 4 (1 part offset) - 8 (1 part header)
 ##  = 1948
-## CHECK: FileSize:   1948
+# CHECK: FileSize:   1948
   FileSize:1984
-## CHECK: PartCount:  6
+# CHECK: PartCount:  6
   PartCount:   7
   PartOffsets: [ 60, 1792, 1808, 1836, 1852, 1868, 1900 ]
 Parts:
-## CHECK-NOT: RTS0
+# CHECK-NOT: RTS0

jh7370 wrote:

Same comment as above re checking for "Parts:"

https://github.com/llvm/llvm-project/pull/153246
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [DirectX] Add support for `remove-section` of `DXContainer` for `llvm-objcopy` (PR #153246)

2025-08-15 Thread James Henderson via llvm-branch-commits



@@ -12,18 +12,18 @@ Header:
   Version:
 Major:   1
 Minor:   0
-## FileSize = 1996 - 8 (FKE0 content) - 1688 (FKE4 content)
+## FileSize = 1996 - 8 (FKE1 content) - 1688 (FKE4 content)
 ##  - 8 (2 part offsets) - 16 (2 part headers)
 ##  = 276
-## CHECK: FileSize:   276
+# CHECK: FileSize:   276
   FileSize:1996
-## CHECK: PartCount:  5
+# CHECK: PartCount:  5
   PartCount:   7
-## CHECK: PartOffsets:[ 52, 68, 84, 212, 240 ]
+# CHECK: PartOffsets:[ 52, 68, 84, 212, 240 ]
   PartOffsets: [ 60, 76, 92, 108, 236, 1932, 1960 ]
 Parts:
-## CHECK-NOT: FKE1
-## CHECK-NOT: FKE4
+# CHECK-NOT: FKE1
+# CHECK-NOT: FKE4

jh7370 wrote:

Are you aware that this just means FKE1 and FKE4 can't appear from this point 
onwards? I'd be tempted to put a `# CHECK: Parts:` line before them, since 
technically the obj2yaml output order doesn't have to be fixed and this will 
help remove the risk of things rotting.

https://github.com/llvm/llvm-project/pull/153246
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [lldb] 82f2353 - Revert "[lldb] Fallback to expression eval when Dump of variable fails in dwi…"

2025-08-15 Thread via llvm-branch-commits


Author: Dave Lee
Date: 2025-08-15T08:46:32-07:00
New Revision: 82f23539351c8365281f6b68a42564c4ec25548e

URL: 
https://github.com/llvm/llvm-project/commit/82f23539351c8365281f6b68a42564c4ec25548e
DIFF: 
https://github.com/llvm/llvm-project/commit/82f23539351c8365281f6b68a42564c4ec25548e.diff

LOG: Revert "[lldb] Fallback to expression eval when Dump of variable fails in 
dwi…"

This reverts commit f23c10f9e68efae7df10745234bf879a84b2d02b.

Added: 


Modified: 
lldb/source/Commands/CommandObjectDWIMPrint.cpp

Removed: 




diff  --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp 
b/lldb/source/Commands/CommandObjectDWIMPrint.cpp
index 5e864a4cc52c2..0d9eb45732161 100644
--- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp
+++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp
@@ -18,14 +18,11 @@
 #include "lldb/Interpreter/OptionGroupValueObjectDisplay.h"
 #include "lldb/Target/StackFrame.h"
 #include "lldb/Utility/ConstString.h"
-#include "lldb/Utility/LLDBLog.h"
-#include "lldb/Utility/Log.h"
 #include "lldb/ValueObject/ValueObject.h"
 #include "lldb/lldb-defines.h"
 #include "lldb/lldb-enumerations.h"
 #include "lldb/lldb-forward.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Error.h"
 
 #include 
 
@@ -135,22 +132,27 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command,
   };
 
   // Dump `valobj` according to whether `po` was requested or not.
-  auto dump_val_object = [&](ValueObject &valobj) -> Error {
+  auto dump_val_object = [&](ValueObject &valobj) {
 if (is_po) {
   StreamString temp_result_stream;
-  if (Error err = valobj.Dump(temp_result_stream, dump_options))
-return err;
+  if (llvm::Error error = valobj.Dump(temp_result_stream, dump_options)) {
+result.AppendError(toString(std::move(error)));
+return;
+  }
   llvm::StringRef output = temp_result_stream.GetString();
   maybe_add_hint(output);
   result.GetOutputStream() << output;
 } else {
-  if (Error err = valobj.Dump(result.GetOutputStream(), dump_options))
-return err;
+  llvm::Error error =
+valobj.Dump(result.GetOutputStream(), dump_options);
+  if (error) {
+result.AppendError(toString(std::move(error)));
+return;
+  }
 }
 m_interpreter.PrintWarningsIfNecessary(result.GetOutputStream(),
m_cmd_name);
 result.SetStatus(eReturnStatusSuccessFinishResult);
-return Error::success();
   };
 
   // First, try `expr` as a _limited_ frame variable expression path: only the
@@ -184,13 +186,8 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command,
  expr);
   }
 
-  Error err = dump_val_object(*valobj_sp);
-  if (!err)
-return;
-
-  // Dump failed, continue on to expression evaluation.
-  LLDB_LOG_ERROR(GetLog(LLDBLog::Expressions), std::move(err),
- "could not print frame variable '{1}': {0}", expr);
+  dump_val_object(*valobj_sp);
+  return;
 }
   }
 
@@ -199,14 +196,8 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command,
 if (auto *state = target.GetPersistentExpressionStateForLanguage(language))
   if (auto var_sp = state->GetVariable(expr))
 if (auto valobj_sp = var_sp->GetValueObject()) {
-  Error err = dump_val_object(*valobj_sp);
-  if (!err)
-return;
-
-  // Dump failed, continue on to expression evaluation.
-  LLDB_LOG_ERROR(GetLog(LLDBLog::Expressions), std::move(err),
- "could not print persistent variable '{1}': {0}",
- expr);
+  dump_val_object(*valobj_sp);
+  return;
 }
 
   // Third, and lastly, try `expr` as a source expression to evaluate.
@@ -257,12 +248,10 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command,
   result.AppendNoteWithFormatv("ran `expression {0}{1}`", flags, expr);
 }
 
-if (valobj_sp->GetError().GetError() != UserExpression::kNoResult) {
-  if (Error err = dump_val_object(*valobj_sp))
-result.SetError(std::move(err));
-} else {
+if (valobj_sp->GetError().GetError() != UserExpression::kNoResult)
+  dump_val_object(*valobj_sp);
+else
   result.SetStatus(eReturnStatusSuccessFinishNoResult);
-}
 
 if (suppress_result)
   if (auto result_var_sp =



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [clang][PAC][ObjC] Merge the block metadata support for the arm64e abi to llvm 21 (PR #153725)

2025-08-15 Thread Oliver Hunt via llvm-branch-commits

ojhunt wrote:

> Not opposed but this is a pretty significant amount of change for being this 
> late in the rc cycles, and the changes haven't been upstream for very long. 
> How risky are these changes?

This is the upstreaming of code we've had deployed for a few years at this 
point, the only changes are style improvements and the brief period where we 
thought we could drop `LangOpts.PointerAuthBlockDescriptorPointers` from the 
upstreaming, before remembering that that's only ABI on a subset of our 
user-space environments.

So I do recognize it is a much bigger change than would be ideal at this point, 
but it is self contained, and only actually applies to one family of platforms, 
which have significant amounts of deployment experience. e.g even if there is 
an error in it, it only impacts us, and not having it is even worse as instead 
of incorrect edge cases (because if there were non-edge case problems we have a 
number of tests that would presumably detect it) it would be a complete ABI 
mismatch.

https://github.com/llvm/llvm-project/pull/153725
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [flang] [flang][OpenMP] Parse GROUPPRIVATE directive (PR #153807)

2025-08-15 Thread Krzysztof Parzyszek via llvm-branch-commits


https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/153807

>From ccc414db30f65308d47d2efbb3198a896bd5a67e Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Fri, 15 Aug 2025 08:12:45 -0500
Subject: [PATCH 1/3] [flang][OpenMP] Parse GROUPPRIVATE directive

No semantic checks or lowering yet.
---
 flang/include/flang/Parser/dump-parse-tree.h  |  1 +
 flang/include/flang/Parser/parse-tree.h   | 14 +++--
 flang/lib/Lower/OpenMP/OpenMP.cpp |  7 +
 flang/lib/Parser/openmp-parsers.cpp   |  8 +
 flang/lib/Parser/unparse.cpp  |  7 +
 flang/lib/Semantics/check-omp-structure.cpp   | 13 
 flang/lib/Semantics/check-omp-structure.h |  2 ++
 flang/test/Lower/OpenMP/Todo/groupprivate.f90 |  9 ++
 flang/test/Parser/OpenMP/groupprivate.f90 | 30 +++
 9 files changed, 89 insertions(+), 2 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/Todo/groupprivate.f90
 create mode 100644 flang/test/Parser/OpenMP/groupprivate.f90

diff --git a/flang/include/flang/Parser/dump-parse-tree.h 
b/flang/include/flang/Parser/dump-parse-tree.h
index 2c666a6d09a7b..8fbc6ccc639bf 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -729,6 +729,7 @@ class ParseTreeDumper {
   NODE(parser, OpenMPLoopConstruct)
   NODE(parser, OpenMPExecutableAllocate)
   NODE(parser, OpenMPAllocatorsConstruct)
+  NODE(parser, OpenMPGroupprivate)
   NODE(parser, OpenMPRequiresConstruct)
   NODE(parser, OpenMPSimpleStandaloneConstruct)
   NODE(parser, OpenMPStandaloneConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h 
b/flang/include/flang/Parser/parse-tree.h
index e72190f019dd1..ae0259fe9025e 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4943,6 +4943,15 @@ struct OpenMPDeclareSimdConstruct {
   std::tuple, OmpClauseList> t;
 };
 
+// ref: [6.0:301-303]
+//
+// groupprivate-directive ->
+//GROUPPRIVATE (variable-list-item...)  // since 6.0
+struct OpenMPGroupprivate {
+  WRAPPER_CLASS_BOILERPLATE(OpenMPGroupprivate, OmpDirectiveSpecification);
+  CharBlock source;
+};
+
 // 2.4 requires -> REQUIRES requires-clause[ [ [,] requires-clause]...]
 struct OpenMPRequiresConstruct {
   TUPLE_CLASS_BOILERPLATE(OpenMPRequiresConstruct);
@@ -4970,8 +4979,9 @@ struct OpenMPDeclarativeConstruct {
   std::variant
+  OmpDeclareVariantDirective, OpenMPGroupprivate, OpenMPThreadprivate,
+  OpenMPRequiresConstruct, OpenMPUtilityConstruct,
+  OmpMetadirectiveDirective>
   u;
 };
 
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fef64ccc15015..ec2ec37e623f8 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3593,6 +3593,13 @@ genOMP(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   }
 }
 
+static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
+   semantics::SemanticsContext &semaCtx,
+   lower::pft::Evaluation &eval,
+   const parser::OpenMPGroupprivate &directive) {
+  TODO(converter.getCurrentLocation(), "GROUPPRIVATE");
+}
+
 static void genOMP(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
diff --git a/flang/lib/Parser/openmp-parsers.cpp 
b/flang/lib/Parser/openmp-parsers.cpp
index 46b14861096f1..41c16212f5771 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -1773,6 +1773,12 @@ 
TYPE_PARSER(sourced(construct(
 verbatim("DECLARE SIMD"_tok) || verbatim("DECLARE_SIMD"_tok),
 maybe(parenthesized(name)), Parser{})))
 
+TYPE_PARSER(sourced( //
+construct(
+predicated(OmpDirectiveNameParser{},
+IsDirective(llvm::omp::Directive::OMPD_groupprivate)) >=
+Parser{})))
+
 // 2.4 Requires construct
 TYPE_PARSER(sourced(construct(
 verbatim("REQUIRES"_tok), Parser{})))
@@ -1808,6 +1814,8 @@ TYPE_PARSER(
 Parser{}) ||
 construct(
 Parser{}) ||
+construct(
+Parser{}) ||
 construct(
 Parser{}) ||
 construct(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 4f8d498972807..4294a6d491648 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2716,6 +2716,13 @@ class UnparseVisitor {
   void Unparse(const OpenMPDispatchConstruct &x) { //
 Unparse(static_cast(x));
   }
+  void Unparse(const OpenMPGroupprivate &x) {
+BeginOpenMP();
+Word("!$OMP ");
+Walk(x.v);
+Put("\n");
+EndOpenMP();
+  }
   void Unparse(const OpenMPRequiresC

[llvm-branch-commits] [llvm] [mlir] [OpenMP][OMPIRBuilder] Use device shared memory for arg structures (PR #150925)

2025-08-15 Thread Sergio Afonso via llvm-branch-commits


https://github.com/skatrak updated 
https://github.com/llvm/llvm-project/pull/150925

>From 688b61435b38e8632ab81e9aa94fadb5aa5ad7f1 Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Thu, 3 Jul 2025 16:47:51 +0100
Subject: [PATCH 1/4] [OpenMP][OMPIRBuilder] Use device shared memory for arg
 structures

Argument structures are created when sections of the LLVM IR corresponding to
an OpenMP construct are outlined into their own function. For this, stack
allocations are used.

This patch modifies this behavior when compiling for a target device and
outlining `parallel`-related IR, so that it uses device shared memory instead
of private stack space. This is needed in order for threads to have access to
these arguments.
---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h   |  6 ++
 .../llvm/Transforms/Utils/CodeExtractor.h | 34 ++-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 98 +--
 llvm/lib/Transforms/Utils/CodeExtractor.cpp   | 73 ++
 .../LLVMIR/omptarget-parallel-llvm.mlir   | 10 +-
 5 files changed, 187 insertions(+), 34 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h 
b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 0fb664aa5f888..90740e0f4fad0 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2244,7 +2244,13 @@ class OpenMPIRBuilder {
   /// during finalization.
   struct OutlineInfo {
 using PostOutlineCBTy = std::function;
+using CustomArgAllocatorCBTy = std::function;
+using CustomArgDeallocatorCBTy = std::function;
 PostOutlineCBTy PostOutlineCB;
+CustomArgAllocatorCBTy CustomArgAllocatorCB;
+CustomArgDeallocatorCBTy CustomArgDeallocatorCB;
 BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
 SmallVector ExcludeArgsFromAggregate;
 
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h 
b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 407eb50d2c7a3..d72f697cda992 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/Support/Compiler.h"
 #include 
 
@@ -24,7 +25,6 @@ namespace llvm {
 
 template  class SmallPtrSetImpl;
 class AllocaInst;
-class BasicBlock;
 class BlockFrequency;
 class BlockFrequencyInfo;
 class BranchProbabilityInfo;
@@ -85,6 +85,10 @@ class CodeExtractorAnalysisCache {
   /// 3) Add allocas for any scalar outputs, adding all of the outputs' allocas
   ///as arguments, and inserting stores to the arguments for any scalars.
   class CodeExtractor {
+using CustomArgAllocatorCBTy = std::function;
+using CustomArgDeallocatorCBTy = std::function;
 using ValueSet = SetVector;
 
 // Various bits of state computed on construction.
@@ -133,6 +137,25 @@ class CodeExtractorAnalysisCache {
 // space.
 bool ArgsInZeroAddressSpace;
 
+// If set, this callback will be used to allocate the arguments in the
+// caller before passing it to the outlined function holding the extracted
+// piece of code.
+CustomArgAllocatorCBTy *CustomArgAllocatorCB;
+
+// A block outside of the extraction set where previously introduced
+// intermediate allocations can be deallocated. This is only used when an
+// custom deallocator is specified.
+BasicBlock *DeallocationBlock;
+
+// If set, this callback will be used to deallocate the arguments in the
+// caller after running the outlined function holding the extracted piece 
of
+// code. It will not be called if a custom allocator isn't also present.
+//
+// By default, this will be done at the end of the basic block containing
+// the call to the outlined function, except if a deallocation block is
+// specified. In that case, that will take precedence.
+CustomArgDeallocatorCBTy *CustomArgDeallocatorCB;
+
   public:
 /// Create a code extractor for a sequence of blocks.
 ///
@@ -149,7 +172,9 @@ class CodeExtractorAnalysisCache {
 /// the function from which the code is being extracted.
 /// If ArgsInZeroAddressSpace param is set to true, then the aggregate
 /// param pointer of the outlined function is declared in zero address
-/// space.
+/// space. If a CustomArgAllocatorCB callback is specified, it will be used
+/// to allocate any structures or variable copies needed to pass arguments
+/// to the outlined function, rather than using regular allocas.
 LLVM_ABI
 CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr,
   bool AggregateArgs = false, BlockFrequencyInfo *BFI = 
nullptr,
@@ -157,7 +182,10 @@ class CodeExtractorAnalysisCache {
   AssumptionCache *AC = nullptr, bool AllowVarArgs = false,
   bool AllowAlloca = false,
   BasicBlock *Allocat

[llvm-branch-commits] [llvm] [mlir] [OpenMP][OMPIRBuilder] Support parallel in Generic kernels (PR #150926)

2025-08-15 Thread Sergio Afonso via llvm-branch-commits


https://github.com/skatrak updated 
https://github.com/llvm/llvm-project/pull/150926

>From bd815722923fad252e5f41910343ef03f6f92883 Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Fri, 4 Jul 2025 16:32:03 +0100
Subject: [PATCH 1/2] [OpenMP][OMPIRBuilder] Support parallel in Generic
 kernels

This patch introduces codegen logic to produce a wrapper function argument for
the `__kmpc_parallel_51` DeviceRTL function needed to handle arguments passed
using device shared memory in Generic mode.
---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 100 --
 .../LLVMIR/omptarget-parallel-llvm.mlir   |  25 -
 2 files changed, 116 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 83cb21b54394b..33a9c8d114cb6 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1406,6 +1406,86 @@ Error OpenMPIRBuilder::emitCancelationCheckImpl(
   return Error::success();
 }
 
+// Create wrapper function used to gather the outlined function's argument
+// structure from a shared buffer and to forward them to it when running in
+// Generic mode.
+//
+// The outlined function is expected to receive 2 integer arguments followed by
+// an optional pointer argument to an argument structure holding the rest.
+static Function *createTargetParallelWrapper(OpenMPIRBuilder *OMPIRBuilder,
+ Function &OutlinedFn) {
+  size_t NumArgs = OutlinedFn.arg_size();
+  assert((NumArgs == 2 || NumArgs == 3) &&
+ "expected a 2-3 argument parallel outlined function");
+  bool UseArgStruct = NumArgs == 3;
+
+  IRBuilder<> &Builder = OMPIRBuilder->Builder;
+  IRBuilder<>::InsertPointGuard IPG(Builder);
+  auto *FnTy = FunctionType::get(Builder.getVoidTy(),
+ {Builder.getInt16Ty(), Builder.getInt32Ty()},
+ /*isVarArg=*/false);
+  auto *WrapperFn =
+  Function::Create(FnTy, GlobalValue::InternalLinkage,
+   OutlinedFn.getName() + ".wrapper", OMPIRBuilder->M);
+
+  WrapperFn->addParamAttr(0, Attribute::NoUndef);
+  WrapperFn->addParamAttr(0, Attribute::ZExt);
+  WrapperFn->addParamAttr(1, Attribute::NoUndef);
+
+  BasicBlock *EntryBB =
+  BasicBlock::Create(OMPIRBuilder->M.getContext(), "entry", WrapperFn);
+  Builder.SetInsertPoint(EntryBB);
+
+  // Allocation.
+  Value *AddrAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
+   /*ArraySize=*/nullptr, "addr");
+  AddrAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
+  AddrAlloca, Builder.getPtrTy(/*AddrSpace=*/0),
+  AddrAlloca->getName() + ".ascast");
+
+  Value *ZeroAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
+   /*ArraySize=*/nullptr, "zero");
+  ZeroAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
+  ZeroAlloca, Builder.getPtrTy(/*AddrSpace=*/0),
+  ZeroAlloca->getName() + ".ascast");
+
+  Value *ArgsAlloca = nullptr;
+  if (UseArgStruct) {
+ArgsAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
+  /*ArraySize=*/nullptr, "global_args");
+ArgsAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ArgsAlloca, Builder.getPtrTy(/*AddrSpace=*/0),
+ArgsAlloca->getName() + ".ascast");
+  }
+
+  // Initialization.
+  Builder.CreateStore(WrapperFn->getArg(1), AddrAlloca);
+  Builder.CreateStore(Builder.getInt32(0), ZeroAlloca);
+  if (UseArgStruct) {
+Builder.CreateCall(
+OMPIRBuilder->getOrCreateRuntimeFunctionPtr(
+llvm::omp::RuntimeFunction::OMPRTL___kmpc_get_shared_variables),
+{ArgsAlloca});
+  }
+
+  SmallVector Args{AddrAlloca, ZeroAlloca};
+
+  // Load structArg from global_args.
+  if (UseArgStruct) {
+Value *StructArg = Builder.CreateLoad(Builder.getPtrTy(), ArgsAlloca);
+StructArg = Builder.CreateInBoundsGEP(Builder.getPtrTy(), StructArg,
+  {Builder.getInt64(0)});
+StructArg = Builder.CreateLoad(Builder.getPtrTy(), StructArg, "structArg");
+Args.push_back(StructArg);
+  }
+
+  // Call the outlined function holding the parallel body.
+  Builder.CreateCall(&OutlinedFn, Args);
+  Builder.CreateRetVoid();
+
+  return WrapperFn;
+}
+
 // Callback used to create OpenMP runtime calls to support
 // omp parallel clause for the device.
 // We need to use this callback to replace call to the OutlinedFn in OuterFn
@@ -1415,6 +1495,10 @@ static void targetParallelCallback(
 BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition,
 Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr,
 Value *ThreadID, const SmallVector &ToBeDeleted) {
+  assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+  unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
   //

[llvm-branch-commits] [llvm] [OpenMPOpt] Make parallel regions reachable from new DeviceRTL loop functions (PR #150927)

2025-08-15 Thread Sergio Afonso via llvm-branch-commits


https://github.com/skatrak updated 
https://github.com/llvm/llvm-project/pull/150927

>From 38a38bb056951bf50c5af7f7562bcbb834259a39 Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Fri, 25 Jul 2025 13:52:11 +0100
Subject: [PATCH 1/2] [OpenMPOpt] Make parallel regions reachable from new
 DeviceRTL loop functions

This patch updates the OpenMP optimization pass to know about the new DeviceRTL
functions for loop constructs.

This change marks these functions as potentially containing parallel regions,
which fixes a current bug with the state machine rewrite optimization. It
previously failed to identify parallel regions located inside of the callbacks
passed to these new DeviceRTL functions, causing the resulting code to skip
executing these parallel regions.

As a result, Generic kernels produced by Flang that contain parallel regions
now work properly.

One known related issue not fixed by this patch is that the presence of calls
to these functions will prevent the SPMD-ization of Generic kernels by
OpenMPOpt. Previously, this was due to assuming there was no parallel region.
This is changed by this patch, but instead we now mark it temporarily as
unsupported in an SPMD context. The reason is that, without additional changes,
code intended for the main thread of the team located outside of the parallel
region would not be guarded properly, resulting in race conditions and
generally invalid behavior.
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp |  22 +++
 .../fortran/target-generic-loops.f90  | 130 ++
 .../offloading/fortran/target-spmd-loops.f90  |  39 ++
 3 files changed, 191 insertions(+)
 create mode 100644 offload/test/offloading/fortran/target-generic-loops.f90
 create mode 100644 offload/test/offloading/fortran/target-spmd-loops.f90

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp 
b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 5e2247f2a88d0..d58da7b1db0e3 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -5020,6 +5020,28 @@ struct AAKernelInfoCallSite : AAKernelInfo {
   case OMPRTL___kmpc_free_shared:
 // Return without setting a fixpoint, to be resolved in updateImpl.
 return;
+  case OMPRTL___kmpc_distribute_static_loop_4:
+  case OMPRTL___kmpc_distribute_static_loop_4u:
+  case OMPRTL___kmpc_distribute_static_loop_8:
+  case OMPRTL___kmpc_distribute_static_loop_8u:
+  case OMPRTL___kmpc_distribute_for_static_loop_4:
+  case OMPRTL___kmpc_distribute_for_static_loop_4u:
+  case OMPRTL___kmpc_distribute_for_static_loop_8:
+  case OMPRTL___kmpc_distribute_for_static_loop_8u:
+  case OMPRTL___kmpc_for_static_loop_4:
+  case OMPRTL___kmpc_for_static_loop_4u:
+  case OMPRTL___kmpc_for_static_loop_8:
+  case OMPRTL___kmpc_for_static_loop_8u:
+// Parallel regions might be reached by these calls, as they take a
+// callback argument potentially arbitrary user-provided code.
+ReachedUnknownParallelRegions.insert(&CB);
+// TODO: The presence of these calls on their own does not prevent a
+// kernel from being SPMD-izable. We mark it as such because we need
+// further changes in order to also consider the contents of the
+// callbacks passed to them.
+SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+SPMDCompatibilityTracker.insert(&CB);
+break;
   default:
 // Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
 // generally. However, they do not hide parallel regions.
diff --git a/offload/test/offloading/fortran/target-generic-loops.f90 
b/offload/test/offloading/fortran/target-generic-loops.f90
new file mode 100644
index 0..07bcbfd2c8752
--- /dev/null
+++ b/offload/test/offloading/fortran/target-generic-loops.f90
@@ -0,0 +1,130 @@
+! Offloading test for generic target regions containing different kinds of
+! loop constructs inside.
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+  integer :: i1, i2, n1, n2, counter
+
+  n1 = 100
+  n2 = 50
+
+  counter = 0
+  !$omp target map(tofrom:counter)
+!$omp teams distribute reduction(+:counter)
+do i1=1, n1
+  counter = counter + 1
+end do
+  !$omp end target
+
+  ! CHECK: 1 100
+  print '(I2" "I0)', 1, counter
+
+  counter = 0
+  !$omp target map(tofrom:counter)
+!$omp parallel do reduction(+:counter)
+do i1=1, n1
+  counter = counter + 1
+end do
+!$omp parallel do reduction(+:counter)
+do i1=1, n1
+  counter = counter + 1
+end do
+  !$omp end target
+
+  ! CHECK: 2 200
+  print '(I2" "I0)', 2, counter
+
+  counter = 0
+  !$omp target map(tofrom:counter)
+counter = counter + 1
+!$omp parallel do reduction(+:counter)
+do i1=1, n1
+  counter = counter + 1
+end do
+counter = counter + 1
+!$omp parallel do reduction(+:counter)
+do i1=1, n1
+  counte

[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add missing operand when disassembling Y4_crswap10 (#153849) (PR #153926)

2025-08-15 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/153926
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add missing operand when disassembling Y4_crswap10 (#153849) (PR #153926)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-mc

Author: None (llvmbot)


Changes

Backport 76d993bd25ff462d915f69772454e7b1ca42fdb8

Requested by: @androm3da

---
Full diff: https://github.com/llvm/llvm-project/pull/153926.diff


2 Files Affected:

- (modified) llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp (+3) 
- (modified) llvm/test/MC/Hexagon/system-inst.s (+3) 


``diff
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp 
b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 22cff7c80fa01..bcddb540d35dc 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -526,6 +526,9 @@ DecodeStatus 
HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
 MI.insert(MI.begin() + 1,
   MCOperand::createExpr(MCConstantExpr::create(-1, getContext(;
 break;
+  case Hexagon::Y4_crswap10:
+MI.addOperand(MCOperand::createReg(Hexagon::SGP1_0));
+break;
   default:
 break;
   }
diff --git a/llvm/test/MC/Hexagon/system-inst.s 
b/llvm/test/MC/Hexagon/system-inst.s
index 7bc1533598532..07f7ca0acb2dc 100644
--- a/llvm/test/MC/Hexagon/system-inst.s
+++ b/llvm/test/MC/Hexagon/system-inst.s
@@ -89,6 +89,9 @@ crswap(r12,sgp0)
 #CHECK: 652dc000 { crswap(r13,sgp1) }
 crswap(r13,sgp1)
 
+#CHECK: 6d8ec000 { crswap(r15:14,s1:0) }
+crswap(r15:14,sgp1:0)
+
 #CHECK: 660fc00e { r14 = getimask(r15) }
 r14=getimask(r15)
 

``




https://github.com/llvm/llvm-project/pull/153926
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add missing operand when disassembling Y4_crswap10 (#153849) (PR #153926)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-hexagon

Author: None (llvmbot)


Changes

Backport 76d993bd25ff462d915f69772454e7b1ca42fdb8

Requested by: @androm3da

---
Full diff: https://github.com/llvm/llvm-project/pull/153926.diff


2 Files Affected:

- (modified) llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp (+3) 
- (modified) llvm/test/MC/Hexagon/system-inst.s (+3) 


``diff
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp 
b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 22cff7c80fa01..bcddb540d35dc 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -526,6 +526,9 @@ DecodeStatus 
HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
 MI.insert(MI.begin() + 1,
   MCOperand::createExpr(MCConstantExpr::create(-1, getContext(;
 break;
+  case Hexagon::Y4_crswap10:
+MI.addOperand(MCOperand::createReg(Hexagon::SGP1_0));
+break;
   default:
 break;
   }
diff --git a/llvm/test/MC/Hexagon/system-inst.s 
b/llvm/test/MC/Hexagon/system-inst.s
index 7bc1533598532..07f7ca0acb2dc 100644
--- a/llvm/test/MC/Hexagon/system-inst.s
+++ b/llvm/test/MC/Hexagon/system-inst.s
@@ -89,6 +89,9 @@ crswap(r12,sgp0)
 #CHECK: 652dc000 { crswap(r13,sgp1) }
 crswap(r13,sgp1)
 
+#CHECK: 6d8ec000 { crswap(r15:14,s1:0) }
+crswap(r15:14,sgp1:0)
+
 #CHECK: 660fc00e { r14 = getimask(r15) }
 r14=getimask(r15)
 

``




https://github.com/llvm/llvm-project/pull/153926
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add missing operand when disassembling Y4_crswap10 (#153849) (PR #153926)

2025-08-15 Thread via llvm-branch-commits


llvmbot wrote:

@quic-akaryaki What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/153926
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add missing operand when disassembling Y4_crswap10 (#153849) (PR #153926)

2025-08-15 Thread via llvm-branch-commits


https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/153926

Backport 76d993bd25ff462d915f69772454e7b1ca42fdb8

Requested by: @androm3da

>From 27a00648607f22b4b2d1de4adb72fe6364a7ef88 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov 
Date: Sat, 16 Aug 2025 05:13:43 +0300
Subject: [PATCH] [Hexagon] Add missing operand when disassembling Y4_crswap10
 (#153849)

Auto-generated decoder fails to add the $sgp10 operand because it has no
encoding bits.
Work around this by adding the missing operand after decoding is
complete.

Fixes #153829.

(cherry picked from commit 76d993bd25ff462d915f69772454e7b1ca42fdb8)
---
 llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp | 3 +++
 llvm/test/MC/Hexagon/system-inst.s   | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp 
b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 22cff7c80fa01..bcddb540d35dc 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -526,6 +526,9 @@ DecodeStatus 
HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
 MI.insert(MI.begin() + 1,
   MCOperand::createExpr(MCConstantExpr::create(-1, getContext(;
 break;
+  case Hexagon::Y4_crswap10:
+MI.addOperand(MCOperand::createReg(Hexagon::SGP1_0));
+break;
   default:
 break;
   }
diff --git a/llvm/test/MC/Hexagon/system-inst.s 
b/llvm/test/MC/Hexagon/system-inst.s
index 7bc1533598532..07f7ca0acb2dc 100644
--- a/llvm/test/MC/Hexagon/system-inst.s
+++ b/llvm/test/MC/Hexagon/system-inst.s
@@ -89,6 +89,9 @@ crswap(r12,sgp0)
 #CHECK: 652dc000 { crswap(r13,sgp1) }
 crswap(r13,sgp1)
 
+#CHECK: 6d8ec000 { crswap(r15:14,s1:0) }
+crswap(r15:14,sgp1:0)
+
 #CHECK: 660fc00e { r14 = getimask(r15) }
 r14=getimask(r15)
 

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

95 matches

Mail list logo