[llvm-branch-commits] [llvm] [LoongArch] Reduce number of reserved relocations when relax enabled (PR #153769)
https://github.com/zhaoqi5 updated
https://github.com/llvm/llvm-project/pull/153769
>From f67324528d93ca3e908f39e8e89caef5ecc3e11f Mon Sep 17 00:00:00 2001
From: Qi Zhao
Date: Fri, 15 Aug 2025 17:12:33 +0800
Subject: [PATCH 1/2] [LoongArch] Reduce number of reserved relocations when
relax enabled
---
.../MCTargetDesc/LoongArchAsmBackend.cpp | 27 +++
.../MC/LoongArch/Relocations/relax-attr.s | 7 ++---
2 files changed, 13 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index ca5d27d54bb81..c6eda4fd63a6b 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -143,8 +143,6 @@ static void fixupLeb128(MCContext &Ctx, const MCFixup
&Fixup, uint8_t *Data,
void LoongArchAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
const MCValue &Target, uint8_t *Data,
uint64_t Value, bool IsResolved) {
- if (IsResolved && shouldForceRelocation(Fixup, Target))
-IsResolved = false;
IsResolved = addReloc(F, Fixup, Target, Value, IsResolved);
if (!Value)
return; // Doesn't change encoding.
@@ -176,20 +174,6 @@ void LoongArchAsmBackend::applyFixup(const MCFragment &F,
const MCFixup &Fixup,
}
}
-bool LoongArchAsmBackend::shouldForceRelocation(const MCFixup &Fixup,
-const MCValue &Target) {
- switch (Fixup.getKind()) {
- default:
-return STI.hasFeature(LoongArch::FeatureRelax);
- case FK_Data_1:
- case FK_Data_2:
- case FK_Data_4:
- case FK_Data_8:
- case FK_Data_leb128:
-return !Target.isAbsolute();
- }
-}
-
static inline std::pair
getRelocPairForSize(unsigned Size) {
switch (Size) {
@@ -484,9 +468,16 @@ bool LoongArchAsmBackend::addReloc(const MCFragment &F,
const MCFixup &Fixup,
return false;
}
- IsResolved = Fallback();
// If linker relaxation is enabled and supported by the current relocation,
- // append a RELAX relocation.
+ // generate a relocation and then append a RELAX.
+ if (Fixup.isLinkerRelaxable())
+IsResolved = false;
+ if (IsResolved && Fixup.isPCRel())
+IsResolved = isPCRelFixupResolved(Target.getAddSym(), F);
+
+ if (!IsResolved)
+Asm->getWriter().recordRelocation(F, Fixup, Target, FixedValue);
+
if (Fixup.isLinkerRelaxable()) {
auto FA = MCFixup::create(Fixup.getOffset(), nullptr, ELF::R_LARCH_RELAX);
Asm->getWriter().recordRelocation(F, FA, MCValue::get(nullptr),
diff --git a/llvm/test/MC/LoongArch/Relocations/relax-attr.s
b/llvm/test/MC/LoongArch/Relocations/relax-attr.s
index e3e9038b755d3..d94d32ebd7ab0 100644
--- a/llvm/test/MC/LoongArch/Relocations/relax-attr.s
+++ b/llvm/test/MC/LoongArch/Relocations/relax-attr.s
@@ -17,11 +17,8 @@
# CHECKR-NEXT: 0x4 R_LARCH_CALL36 foo 0x0
# CHECKR-NEXT: 0x4 R_LARCH_RELAX - 0x0
# CHECKR-NEXT: 0x10 R_LARCH_B21 .L0 0x0
-# CHECKR-NEXT: 0x14 R_LARCH_B21 .L1 0x0
# CHECKR-NEXT: 0x18 R_LARCH_B16 .L0 0x0
-# CHECKR-NEXT: 0x1C R_LARCH_B16 .L1 0x0
# CHECKR-NEXT: 0x20 R_LARCH_B26 .L0 0x0
-# CHECKR-NEXT: 0x24 R_LARCH_B26 .L1 0x0
# CHECKR-NEXT: }
# CHECKR-NEXT: Section ({{.*}}) .rela.data {
# CHECKR-NEXT: 0x0 R_LARCH_64 .L1 0x0
@@ -36,6 +33,10 @@
.L1:
nop
+## Relocations for branches to .L0 must be reserved and be fixed up by linker
+## when linker relaxation enabled, because of the relaxable call36 instruction.
+## Branches to .L1 can be resolved correctly at compile time, so their
+## relocations can simply be removed.
bnez $a0, .L0
beqz $a0, .L1
beq $a0, $a1, .L0
>From f491f2cf66ea530ef3a5f465b87a0dad0b4c6d5e Mon Sep 17 00:00:00 2001
From: Qi Zhao
Date: Fri, 15 Aug 2025 20:13:59 +0800
Subject: [PATCH 2/2] remove shouldForceRelocation declaration
---
llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
index 1f1360119edba..f79d3aa48c54c 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
@@ -44,8 +44,6 @@ class LoongArchAsmBackend : public MCAsmBackend {
void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
uint8_t *Data, uint64_t Value, bool IsResolved) override;
- bool shouldForceRelocation(const MCFixup &Fixup, const MCValue &Target);
-
std::optional getFixupKind(StringRef Name) const override;
MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/li
[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)
llvmbot wrote:
@llvm/pr-subscribers-llvm-binary-utilities
Author: Matt Arsenault (arsenm)
Changes
We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.
BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.
As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.
---
Full diff: https://github.com/llvm/llvm-project/pull/153801.diff
1 Files Affected:
- (modified) llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp (+32-27)
``diff
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index 1c5f38d0c24b8..05f2512e24a50 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -289,7 +289,6 @@ class RuntimeLibcallEmitter {
/// Helper struct for the name hash table.
struct LookupEntry {
- StringRef FuncName;
uint64_t Hash = 0;
unsigned TableValue = 0;
};
@@ -339,14 +338,17 @@ static void emitHashFunction(raw_ostream &OS) {
/// Return the table size, maximum number of collisions for the set of hashes
static std::pair
computePerfectHashParameters(ArrayRef Hashes) {
- const int SizeOverhead = 10;
- const int NumHashes = Hashes.size();
+ // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+ const int SizeOverhead = 4;
// Index derived from hash -> number of collisions.
DenseMap Table;
+ unsigned NumHashes = Hashes.size();
+
for (int MaxCollisions = 1;; ++MaxCollisions) {
-for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead *
NumHashes;
+ N <<= 1) {
Table.clear();
bool NeedResize = false;
@@ -367,22 +369,12 @@ computePerfectHashParameters(ArrayRef Hashes) {
static std::vector
constructPerfectHashTable(ArrayRef Keywords,
- ArrayRef Hashes, int Size, int Collisions,
- StringToOffsetTable &OffsetTable) {
- DenseSet Seen;
+ ArrayRef Hashes,
+ ArrayRef TableValues, int Size,
+ int Collisions, StringToOffsetTable &OffsetTable) {
std::vector Lookup(Size * Collisions);
- for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
-StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
-// We do not want to add repeated entries for cases with the same name,
only
-// an entry for the first, with the name collision enum values immediately
-// following.
-if (!Seen.insert(ImplName).second)
- continue;
-
-uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
-
+ for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
uint64_t Idx = (HashValue % static_cast(Size)) *
static_cast(Collisions);
@@ -390,8 +382,7 @@ constructPerfectHashTable(ArrayRef
Keywords,
for (int J = 0; J < Collisions; ++J) {
LookupEntry &Entry = Lookup[Idx + J];
if (Entry.TableValue == 0) {
-Entry.FuncName = ImplName;
-Entry.TableValue = LibCallImpl.getEnumVal();
+Entry.TableValue = TableValue;
Entry.Hash = HashValue;
Found = true;
break;
@@ -399,7 +390,7 @@ constructPerfectHashTable(ArrayRef
Keywords,
}
if (!Found)
- reportFatalInternalError("failure to hash " + ImplName);
+ reportFatalInternalError("failure to hash");
}
return Lookup;
@@ -409,15 +400,25 @@ constructPerfectHashTable(ArrayRef
Keywords,
void RuntimeLibcallEmitter::emitNameMatchHashTable(
raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
std::vector Hashes(RuntimeLibcallImplDefList.size());
+ std::vector TableValues(RuntimeLibcallImplDefList.size());
+ DenseSet SeenFuncNames;
size_t MaxFuncNameSize = 0;
size_t Index = 0;
+
for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
StringRef ImplName = LibCallImpl.getLibcallFuncName();
-MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
-Hashes[Index++] = hash(ImplName);
+if (SeenFuncNames.insert(ImplName).second) {
+ MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
+ TableValues[Index] = LibCallImpl.getEnumVal();
+ Hashes[Index++] = hash(ImplName);
+}
}
+ // Trim excess elements from non-unique entries.
+ Hashes.resize(SeenFuncNames.size());
+ TableValues.resize(SeenFuncNames.size());
+
LLVM_DEBUG({
for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
StringRef ImplName = LibCallImpl.getLibcallFuncName();
@@ -447,8 +448,9 @
[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)
llvmbot wrote:
@llvm/pr-subscribers-tablegen
Author: Matt Arsenault (arsenm)
Changes
We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.
BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.
As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.
---
Full diff: https://github.com/llvm/llvm-project/pull/153801.diff
1 Files Affected:
- (modified) llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp (+32-27)
``diff
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index 1c5f38d0c24b8..05f2512e24a50 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -289,7 +289,6 @@ class RuntimeLibcallEmitter {
/// Helper struct for the name hash table.
struct LookupEntry {
- StringRef FuncName;
uint64_t Hash = 0;
unsigned TableValue = 0;
};
@@ -339,14 +338,17 @@ static void emitHashFunction(raw_ostream &OS) {
/// Return the table size, maximum number of collisions for the set of hashes
static std::pair
computePerfectHashParameters(ArrayRef Hashes) {
- const int SizeOverhead = 10;
- const int NumHashes = Hashes.size();
+ // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+ const int SizeOverhead = 4;
// Index derived from hash -> number of collisions.
DenseMap Table;
+ unsigned NumHashes = Hashes.size();
+
for (int MaxCollisions = 1;; ++MaxCollisions) {
-for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead *
NumHashes;
+ N <<= 1) {
Table.clear();
bool NeedResize = false;
@@ -367,22 +369,12 @@ computePerfectHashParameters(ArrayRef Hashes) {
static std::vector
constructPerfectHashTable(ArrayRef Keywords,
- ArrayRef Hashes, int Size, int Collisions,
- StringToOffsetTable &OffsetTable) {
- DenseSet Seen;
+ ArrayRef Hashes,
+ ArrayRef TableValues, int Size,
+ int Collisions, StringToOffsetTable &OffsetTable) {
std::vector Lookup(Size * Collisions);
- for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
-StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
-// We do not want to add repeated entries for cases with the same name,
only
-// an entry for the first, with the name collision enum values immediately
-// following.
-if (!Seen.insert(ImplName).second)
- continue;
-
-uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
-
+ for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
uint64_t Idx = (HashValue % static_cast(Size)) *
static_cast(Collisions);
@@ -390,8 +382,7 @@ constructPerfectHashTable(ArrayRef
Keywords,
for (int J = 0; J < Collisions; ++J) {
LookupEntry &Entry = Lookup[Idx + J];
if (Entry.TableValue == 0) {
-Entry.FuncName = ImplName;
-Entry.TableValue = LibCallImpl.getEnumVal();
+Entry.TableValue = TableValue;
Entry.Hash = HashValue;
Found = true;
break;
@@ -399,7 +390,7 @@ constructPerfectHashTable(ArrayRef
Keywords,
}
if (!Found)
- reportFatalInternalError("failure to hash " + ImplName);
+ reportFatalInternalError("failure to hash");
}
return Lookup;
@@ -409,15 +400,25 @@ constructPerfectHashTable(ArrayRef
Keywords,
void RuntimeLibcallEmitter::emitNameMatchHashTable(
raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
std::vector Hashes(RuntimeLibcallImplDefList.size());
+ std::vector TableValues(RuntimeLibcallImplDefList.size());
+ DenseSet SeenFuncNames;
size_t MaxFuncNameSize = 0;
size_t Index = 0;
+
for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
StringRef ImplName = LibCallImpl.getLibcallFuncName();
-MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
-Hashes[Index++] = hash(ImplName);
+if (SeenFuncNames.insert(ImplName).second) {
+ MaxFuncNameSize = std::max(MaxFuncNameSize, ImplName.size());
+ TableValues[Index] = LibCallImpl.getEnumVal();
+ Hashes[Index++] = hash(ImplName);
+}
}
+ // Trim excess elements from non-unique entries.
+ Hashes.resize(SeenFuncNames.size());
+ TableValues.resize(SeenFuncNames.size());
+
LLVM_DEBUG({
for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
StringRef ImplName = LibCallImpl.getLibcallFuncName();
@@ -447,8 +448,9 @@ void Runtim
[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/153801 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)
https://github.com/arsenm created
https://github.com/llvm/llvm-project/pull/153801
We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.
BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.
As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.
>From f445a169e99a9e603eb285ddf9bdd56df0719d2d Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 15 Aug 2025 09:45:46 +0900
Subject: [PATCH] RuntimeLibcalls: Fix building hash table with duplicate
entries
We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.
BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.
As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.
---
.../TableGen/Basic/RuntimeLibcallsEmitter.cpp | 59 ++-
1 file changed, 32 insertions(+), 27 deletions(-)
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index 1c5f38d0c24b8..05f2512e24a50 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -289,7 +289,6 @@ class RuntimeLibcallEmitter {
/// Helper struct for the name hash table.
struct LookupEntry {
- StringRef FuncName;
uint64_t Hash = 0;
unsigned TableValue = 0;
};
@@ -339,14 +338,17 @@ static void emitHashFunction(raw_ostream &OS) {
/// Return the table size, maximum number of collisions for the set of hashes
static std::pair
computePerfectHashParameters(ArrayRef Hashes) {
- const int SizeOverhead = 10;
- const int NumHashes = Hashes.size();
+ // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+ const int SizeOverhead = 4;
// Index derived from hash -> number of collisions.
DenseMap Table;
+ unsigned NumHashes = Hashes.size();
+
for (int MaxCollisions = 1;; ++MaxCollisions) {
-for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead *
NumHashes;
+ N <<= 1) {
Table.clear();
bool NeedResize = false;
@@ -367,22 +369,12 @@ computePerfectHashParameters(ArrayRef Hashes) {
static std::vector
constructPerfectHashTable(ArrayRef Keywords,
- ArrayRef Hashes, int Size, int Collisions,
- StringToOffsetTable &OffsetTable) {
- DenseSet Seen;
+ ArrayRef Hashes,
+ ArrayRef TableValues, int Size,
+ int Collisions, StringToOffsetTable &OffsetTable) {
std::vector Lookup(Size * Collisions);
- for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
-StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
-// We do not want to add repeated entries for cases with the same name,
only
-// an entry for the first, with the name collision enum values immediately
-// following.
-if (!Seen.insert(ImplName).second)
- continue;
-
-uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
-
+ for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
uint64_t Idx = (HashValue % static_cast(Size)) *
static_cast(Collisions);
@@ -390,8 +382,7 @@ constructPerfectHashTable(ArrayRef
Keywords,
for (int J = 0; J < Collisions; ++J) {
LookupEntry &Entry = Lookup[Idx + J];
if (Entry.TableValue == 0) {
-Entry.FuncName = ImplName;
-Entry.TableValue = LibCallImpl.getEnumVal();
+Entry.TableValue = TableValue;
Entry.Hash = HashValue;
Found = true;
break;
@@ -399,7 +390,7 @@ constructPerfectHashTable(ArrayRef
Keywords,
}
if (!Found)
- reportFatalInternalError("failure to hash " + ImplName);
+ reportFatalInternalError("failure to hash");
}
return Lookup;
@@ -409,15 +400,25 @@ constructPerfectHashTable(ArrayRef
Keywords,
void RuntimeLibcallEmitter::emitNameMatchHashTable(
raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
std::vector Hashes(RuntimeLibcallImplDefList.size());
+ std::vector TableValues(RuntimeLibcallImplDefList.size());
+ DenseSet SeenFuncNames;
size_t MaxFuncNameSize = 0;
size_t Index = 0;
+
for (const RuntimeLibcallImpl &LibCallImpl : RuntimeLibcallImplDefList) {
StringRef ImplName = LibCallImpl.getL
[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/153801?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#153801** https://app.graphite.dev/github/pr/llvm/llvm-project/153801?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/153801?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#153210** https://app.graphite.dev/github/pr/llvm/llvm-project/153210?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#153209** https://app.graphite.dev/github/pr/llvm/llvm-project/153209?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#150192** https://app.graphite.dev/github/pr/llvm/llvm-project/150192?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#149836** https://app.graphite.dev/github/pr/llvm/llvm-project/149836?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/153801 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Parse GROUPPRIVATE directive (PR #153807)
https://github.com/kparzysz created
https://github.com/llvm/llvm-project/pull/153807
No semantic checks or lowering yet.
>From ccc414db30f65308d47d2efbb3198a896bd5a67e Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek
Date: Fri, 15 Aug 2025 08:12:45 -0500
Subject: [PATCH] [flang][OpenMP] Parse GROUPPRIVATE directive
No semantic checks or lowering yet.
---
flang/include/flang/Parser/dump-parse-tree.h | 1 +
flang/include/flang/Parser/parse-tree.h | 14 +++--
flang/lib/Lower/OpenMP/OpenMP.cpp | 7 +
flang/lib/Parser/openmp-parsers.cpp | 8 +
flang/lib/Parser/unparse.cpp | 7 +
flang/lib/Semantics/check-omp-structure.cpp | 13
flang/lib/Semantics/check-omp-structure.h | 2 ++
flang/test/Lower/OpenMP/Todo/groupprivate.f90 | 9 ++
flang/test/Parser/OpenMP/groupprivate.f90 | 30 +++
9 files changed, 89 insertions(+), 2 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/Todo/groupprivate.f90
create mode 100644 flang/test/Parser/OpenMP/groupprivate.f90
diff --git a/flang/include/flang/Parser/dump-parse-tree.h
b/flang/include/flang/Parser/dump-parse-tree.h
index 2c666a6d09a7b..8fbc6ccc639bf 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -729,6 +729,7 @@ class ParseTreeDumper {
NODE(parser, OpenMPLoopConstruct)
NODE(parser, OpenMPExecutableAllocate)
NODE(parser, OpenMPAllocatorsConstruct)
+ NODE(parser, OpenMPGroupprivate)
NODE(parser, OpenMPRequiresConstruct)
NODE(parser, OpenMPSimpleStandaloneConstruct)
NODE(parser, OpenMPStandaloneConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h
b/flang/include/flang/Parser/parse-tree.h
index e72190f019dd1..ae0259fe9025e 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4943,6 +4943,15 @@ struct OpenMPDeclareSimdConstruct {
std::tuple, OmpClauseList> t;
};
+// ref: [6.0:301-303]
+//
+// groupprivate-directive ->
+//GROUPPRIVATE (variable-list-item...) // since 6.0
+struct OpenMPGroupprivate {
+ WRAPPER_CLASS_BOILERPLATE(OpenMPGroupprivate, OmpDirectiveSpecification);
+ CharBlock source;
+};
+
// 2.4 requires -> REQUIRES requires-clause[ [ [,] requires-clause]...]
struct OpenMPRequiresConstruct {
TUPLE_CLASS_BOILERPLATE(OpenMPRequiresConstruct);
@@ -4970,8 +4979,9 @@ struct OpenMPDeclarativeConstruct {
std::variant
+ OmpDeclareVariantDirective, OpenMPGroupprivate, OpenMPThreadprivate,
+ OpenMPRequiresConstruct, OpenMPUtilityConstruct,
+ OmpMetadirectiveDirective>
u;
};
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fef64ccc15015..ec2ec37e623f8 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3593,6 +3593,13 @@ genOMP(lower::AbstractConverter &converter,
lower::SymMap &symTable,
}
}
+static void genOMP(lower::AbstractConverter &converter, lower::SymMap
&symTable,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ const parser::OpenMPGroupprivate &directive) {
+ TODO(converter.getCurrentLocation(), "GROUPPRIVATE");
+}
+
static void genOMP(lower::AbstractConverter &converter, lower::SymMap
&symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
diff --git a/flang/lib/Parser/openmp-parsers.cpp
b/flang/lib/Parser/openmp-parsers.cpp
index 46b14861096f1..41c16212f5771 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -1773,6 +1773,12 @@
TYPE_PARSER(sourced(construct(
verbatim("DECLARE SIMD"_tok) || verbatim("DECLARE_SIMD"_tok),
maybe(parenthesized(name)), Parser{})))
+TYPE_PARSER(sourced( //
+construct(
+predicated(OmpDirectiveNameParser{},
+IsDirective(llvm::omp::Directive::OMPD_groupprivate)) >=
+Parser{})))
+
// 2.4 Requires construct
TYPE_PARSER(sourced(construct(
verbatim("REQUIRES"_tok), Parser{})))
@@ -1808,6 +1814,8 @@ TYPE_PARSER(
Parser{}) ||
construct(
Parser{}) ||
+construct(
+Parser{}) ||
construct(
Parser{}) ||
construct(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 4f8d498972807..4294a6d491648 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2716,6 +2716,13 @@ class UnparseVisitor {
void Unparse(const OpenMPDispatchConstruct &x) { //
Unparse(static_cast(x));
}
+ void Unparse(const OpenMPGroupprivate &x) {
+BeginOpenMP();
+Word("!$OMP ");
+Walk(x.v);
+Put("\n");
+EndOpenMP();
+ }
v
[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/153801
>From 6e2b170ea709c205ad27b3e326a4d4ade7822f53 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 15 Aug 2025 09:45:46 +0900
Subject: [PATCH] RuntimeLibcalls: Fix building hash table with duplicate
entries
We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.
BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.
As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.
---
llvm/test/TableGen/RuntimeLibcallEmitter.td | 4 +-
.../TableGen/Basic/RuntimeLibcallsEmitter.cpp | 76 ---
2 files changed, 35 insertions(+), 45 deletions(-)
diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter.td
b/llvm/test/TableGen/RuntimeLibcallEmitter.td
index 7c62402227f7d..2d19d534ec3ef 100644
--- a/llvm/test/TableGen/RuntimeLibcallEmitter.td
+++ b/llvm/test/TableGen/RuntimeLibcallEmitter.td
@@ -176,9 +176,9 @@ def BlahLibrary : SystemRuntimeLibrary
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplNameImpl(StringRef Name) {
// CHECK: static constexpr uint16_t HashTableNameToEnum[16] = {
-// CHECK: 2, // 0x00705301b8, ___memset
+// CHECK: 2,
// CHECK: 0,
-// CHECK: 6, // 0x001417a2af, calloc
+// CHECK: 6,
// CHECK: 0,
// CHECK: };
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index c305e6323ca9d..a8ec873f4587e 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -287,13 +287,6 @@ class RuntimeLibcallEmitter {
void run(raw_ostream &OS);
};
-/// Helper struct for the name hash table.
-struct LookupEntry {
- StringRef FuncName;
- uint64_t Hash = 0;
- unsigned TableValue = 0;
-};
-
} // End anonymous namespace.
void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const {
@@ -339,14 +332,17 @@ static void emitHashFunction(raw_ostream &OS) {
/// Return the table size, maximum number of collisions for the set of hashes
static std::pair
computePerfectHashParameters(ArrayRef Hashes) {
- const int SizeOverhead = 10;
- const int NumHashes = Hashes.size();
+ // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+ const int SizeOverhead = 4;
// Index derived from hash -> number of collisions.
DenseMap Table;
+ unsigned NumHashes = Hashes.size();
+
for (int MaxCollisions = 1;; ++MaxCollisions) {
-for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead *
NumHashes;
+ N <<= 1) {
Table.clear();
bool NeedResize = false;
@@ -365,41 +361,29 @@ computePerfectHashParameters(ArrayRef Hashes) {
}
}
-static std::vector
+static std::vector
constructPerfectHashTable(ArrayRef Keywords,
- ArrayRef Hashes, int Size, int Collisions,
- StringToOffsetTable &OffsetTable) {
- DenseSet Seen;
- std::vector Lookup(Size * Collisions);
-
- for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
-StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
-// We do not want to add repeated entries for cases with the same name,
only
-// an entry for the first, with the name collision enum values immediately
-// following.
-if (!Seen.insert(ImplName).second)
- continue;
-
-uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
+ ArrayRef Hashes,
+ ArrayRef TableValues, int Size,
+ int Collisions, StringToOffsetTable &OffsetTable) {
+ std::vector Lookup(Size * Collisions);
+ for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
uint64_t Idx = (HashValue % static_cast(Size)) *
static_cast(Collisions);
bool Found = false;
for (int J = 0; J < Collisions; ++J) {
- LookupEntry &Entry = Lookup[Idx + J];
- if (Entry.TableValue == 0) {
-Entry.FuncName = ImplName;
-Entry.TableValue = LibCallImpl.getEnumVal();
-Entry.Hash = HashValue;
+ unsigned &Entry = Lookup[Idx + J];
+ if (Entry == 0) {
+Entry = TableValue;
Found = true;
break;
}
}
if (!Found)
- reportFatalInternalError("failure to hash " + ImplName);
+ reportFatalInternalError("failure to hash");
}
return Lookup;
@@ -409,15 +393,25 @@ constructPerfectHashTable(ArrayRef
Keywords,
void RuntimeLibcallEmitter::emitNameMatchHashTable(
raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
std::vecto
[llvm-branch-commits] [flang] [flang][OpenMP] Parse GROUPPRIVATE directive (PR #153807)
llvmbot wrote:
@llvm/pr-subscribers-flang-fir-hlfir
@llvm/pr-subscribers-flang-openmp
Author: Krzysztof Parzyszek (kparzysz)
Changes
No semantic checks or lowering yet.
---
Full diff: https://github.com/llvm/llvm-project/pull/153807.diff
9 Files Affected:
- (modified) flang/include/flang/Parser/dump-parse-tree.h (+1)
- (modified) flang/include/flang/Parser/parse-tree.h (+12-2)
- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+7)
- (modified) flang/lib/Parser/openmp-parsers.cpp (+8)
- (modified) flang/lib/Parser/unparse.cpp (+7)
- (modified) flang/lib/Semantics/check-omp-structure.cpp (+13)
- (modified) flang/lib/Semantics/check-omp-structure.h (+2)
- (added) flang/test/Lower/OpenMP/Todo/groupprivate.f90 (+9)
- (added) flang/test/Parser/OpenMP/groupprivate.f90 (+30)
``diff
diff --git a/flang/include/flang/Parser/dump-parse-tree.h
b/flang/include/flang/Parser/dump-parse-tree.h
index 2c666a6d09a7b..8fbc6ccc639bf 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -729,6 +729,7 @@ class ParseTreeDumper {
NODE(parser, OpenMPLoopConstruct)
NODE(parser, OpenMPExecutableAllocate)
NODE(parser, OpenMPAllocatorsConstruct)
+ NODE(parser, OpenMPGroupprivate)
NODE(parser, OpenMPRequiresConstruct)
NODE(parser, OpenMPSimpleStandaloneConstruct)
NODE(parser, OpenMPStandaloneConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h
b/flang/include/flang/Parser/parse-tree.h
index e72190f019dd1..ae0259fe9025e 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4943,6 +4943,15 @@ struct OpenMPDeclareSimdConstruct {
std::tuple, OmpClauseList> t;
};
+// ref: [6.0:301-303]
+//
+// groupprivate-directive ->
+//GROUPPRIVATE (variable-list-item...) // since 6.0
+struct OpenMPGroupprivate {
+ WRAPPER_CLASS_BOILERPLATE(OpenMPGroupprivate, OmpDirectiveSpecification);
+ CharBlock source;
+};
+
// 2.4 requires -> REQUIRES requires-clause[ [ [,] requires-clause]...]
struct OpenMPRequiresConstruct {
TUPLE_CLASS_BOILERPLATE(OpenMPRequiresConstruct);
@@ -4970,8 +4979,9 @@ struct OpenMPDeclarativeConstruct {
std::variant
+ OmpDeclareVariantDirective, OpenMPGroupprivate, OpenMPThreadprivate,
+ OpenMPRequiresConstruct, OpenMPUtilityConstruct,
+ OmpMetadirectiveDirective>
u;
};
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fef64ccc15015..ec2ec37e623f8 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3593,6 +3593,13 @@ genOMP(lower::AbstractConverter &converter,
lower::SymMap &symTable,
}
}
+static void genOMP(lower::AbstractConverter &converter, lower::SymMap
&symTable,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ const parser::OpenMPGroupprivate &directive) {
+ TODO(converter.getCurrentLocation(), "GROUPPRIVATE");
+}
+
static void genOMP(lower::AbstractConverter &converter, lower::SymMap
&symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
diff --git a/flang/lib/Parser/openmp-parsers.cpp
b/flang/lib/Parser/openmp-parsers.cpp
index 46b14861096f1..41c16212f5771 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -1773,6 +1773,12 @@
TYPE_PARSER(sourced(construct(
verbatim("DECLARE SIMD"_tok) || verbatim("DECLARE_SIMD"_tok),
maybe(parenthesized(name)), Parser{})))
+TYPE_PARSER(sourced( //
+construct(
+predicated(OmpDirectiveNameParser{},
+IsDirective(llvm::omp::Directive::OMPD_groupprivate)) >=
+Parser{})))
+
// 2.4 Requires construct
TYPE_PARSER(sourced(construct(
verbatim("REQUIRES"_tok), Parser{})))
@@ -1808,6 +1814,8 @@ TYPE_PARSER(
Parser{}) ||
construct(
Parser{}) ||
+construct(
+Parser{}) ||
construct(
Parser{}) ||
construct(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 4f8d498972807..4294a6d491648 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2716,6 +2716,13 @@ class UnparseVisitor {
void Unparse(const OpenMPDispatchConstruct &x) { //
Unparse(static_cast(x));
}
+ void Unparse(const OpenMPGroupprivate &x) {
+BeginOpenMP();
+Word("!$OMP ");
+Walk(x.v);
+Put("\n");
+EndOpenMP();
+ }
void Unparse(const OpenMPRequiresConstruct &y) {
BeginOpenMP();
Word("!$OMP REQUIRES ");
diff --git a/flang/lib/Semantics/check-omp-structure.cpp
b/flang/lib/Semantics/check-omp-structure.cpp
index bf126bbb0d8c1..ea8c391999331 100644
--- a/flang/lib/Semantic
[llvm-branch-commits] [flang] [flang][OpenMP] Parse GROUPPRIVATE directive (PR #153807)
llvmbot wrote:
@llvm/pr-subscribers-flang-parser
Author: Krzysztof Parzyszek (kparzysz)
Changes
No semantic checks or lowering yet.
---
Full diff: https://github.com/llvm/llvm-project/pull/153807.diff
9 Files Affected:
- (modified) flang/include/flang/Parser/dump-parse-tree.h (+1)
- (modified) flang/include/flang/Parser/parse-tree.h (+12-2)
- (modified) flang/lib/Lower/OpenMP/OpenMP.cpp (+7)
- (modified) flang/lib/Parser/openmp-parsers.cpp (+8)
- (modified) flang/lib/Parser/unparse.cpp (+7)
- (modified) flang/lib/Semantics/check-omp-structure.cpp (+13)
- (modified) flang/lib/Semantics/check-omp-structure.h (+2)
- (added) flang/test/Lower/OpenMP/Todo/groupprivate.f90 (+9)
- (added) flang/test/Parser/OpenMP/groupprivate.f90 (+30)
``diff
diff --git a/flang/include/flang/Parser/dump-parse-tree.h
b/flang/include/flang/Parser/dump-parse-tree.h
index 2c666a6d09a7b..8fbc6ccc639bf 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -729,6 +729,7 @@ class ParseTreeDumper {
NODE(parser, OpenMPLoopConstruct)
NODE(parser, OpenMPExecutableAllocate)
NODE(parser, OpenMPAllocatorsConstruct)
+ NODE(parser, OpenMPGroupprivate)
NODE(parser, OpenMPRequiresConstruct)
NODE(parser, OpenMPSimpleStandaloneConstruct)
NODE(parser, OpenMPStandaloneConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h
b/flang/include/flang/Parser/parse-tree.h
index e72190f019dd1..ae0259fe9025e 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4943,6 +4943,15 @@ struct OpenMPDeclareSimdConstruct {
std::tuple, OmpClauseList> t;
};
+// ref: [6.0:301-303]
+//
+// groupprivate-directive ->
+//GROUPPRIVATE (variable-list-item...) // since 6.0
+struct OpenMPGroupprivate {
+ WRAPPER_CLASS_BOILERPLATE(OpenMPGroupprivate, OmpDirectiveSpecification);
+ CharBlock source;
+};
+
// 2.4 requires -> REQUIRES requires-clause[ [ [,] requires-clause]...]
struct OpenMPRequiresConstruct {
TUPLE_CLASS_BOILERPLATE(OpenMPRequiresConstruct);
@@ -4970,8 +4979,9 @@ struct OpenMPDeclarativeConstruct {
std::variant
+ OmpDeclareVariantDirective, OpenMPGroupprivate, OpenMPThreadprivate,
+ OpenMPRequiresConstruct, OpenMPUtilityConstruct,
+ OmpMetadirectiveDirective>
u;
};
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fef64ccc15015..ec2ec37e623f8 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3593,6 +3593,13 @@ genOMP(lower::AbstractConverter &converter,
lower::SymMap &symTable,
}
}
+static void genOMP(lower::AbstractConverter &converter, lower::SymMap
&symTable,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ const parser::OpenMPGroupprivate &directive) {
+ TODO(converter.getCurrentLocation(), "GROUPPRIVATE");
+}
+
static void genOMP(lower::AbstractConverter &converter, lower::SymMap
&symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
diff --git a/flang/lib/Parser/openmp-parsers.cpp
b/flang/lib/Parser/openmp-parsers.cpp
index 46b14861096f1..41c16212f5771 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -1773,6 +1773,12 @@
TYPE_PARSER(sourced(construct(
verbatim("DECLARE SIMD"_tok) || verbatim("DECLARE_SIMD"_tok),
maybe(parenthesized(name)), Parser{})))
+TYPE_PARSER(sourced( //
+construct(
+predicated(OmpDirectiveNameParser{},
+IsDirective(llvm::omp::Directive::OMPD_groupprivate)) >=
+Parser{})))
+
// 2.4 Requires construct
TYPE_PARSER(sourced(construct(
verbatim("REQUIRES"_tok), Parser{})))
@@ -1808,6 +1814,8 @@ TYPE_PARSER(
Parser{}) ||
construct(
Parser{}) ||
+construct(
+Parser{}) ||
construct(
Parser{}) ||
construct(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 4f8d498972807..4294a6d491648 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2716,6 +2716,13 @@ class UnparseVisitor {
void Unparse(const OpenMPDispatchConstruct &x) { //
Unparse(static_cast(x));
}
+ void Unparse(const OpenMPGroupprivate &x) {
+BeginOpenMP();
+Word("!$OMP ");
+Walk(x.v);
+Put("\n");
+EndOpenMP();
+ }
void Unparse(const OpenMPRequiresConstruct &y) {
BeginOpenMP();
Word("!$OMP REQUIRES ");
diff --git a/flang/lib/Semantics/check-omp-structure.cpp
b/flang/lib/Semantics/check-omp-structure.cpp
index bf126bbb0d8c1..ea8c391999331 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang
[llvm-branch-commits] [llvm] RuntimeLibcalls: Fix building hash table with duplicate entries (PR #153801)
https://github.com/arsenm updated
https://github.com/llvm/llvm-project/pull/153801
>From 6e2b170ea709c205ad27b3e326a4d4ade7822f53 Mon Sep 17 00:00:00 2001
From: Matt Arsenault
Date: Fri, 15 Aug 2025 09:45:46 +0900
Subject: [PATCH] RuntimeLibcalls: Fix building hash table with duplicate
entries
We were sizing the table appropriately for the number of LibcallImpls,
but many of those have identical names which were pushing up the
collision count unnecessarily. This ends up decreasing the table size
slightly, and makes it a bit faster.
BM_LookupRuntimeLibcallByNameRandomCalls improves by ~25% and
BM_LookupRuntimeLibcallByNameSampleData by ~5%.
As a secondary change, align the table size up to the next
power of 2. This makes the table larger than before, but improves
the sample data benchmark by an additional 5%.
---
llvm/test/TableGen/RuntimeLibcallEmitter.td | 4 +-
.../TableGen/Basic/RuntimeLibcallsEmitter.cpp | 76 ---
2 files changed, 35 insertions(+), 45 deletions(-)
diff --git a/llvm/test/TableGen/RuntimeLibcallEmitter.td
b/llvm/test/TableGen/RuntimeLibcallEmitter.td
index 7c62402227f7d..2d19d534ec3ef 100644
--- a/llvm/test/TableGen/RuntimeLibcallEmitter.td
+++ b/llvm/test/TableGen/RuntimeLibcallEmitter.td
@@ -176,9 +176,9 @@ def BlahLibrary : SystemRuntimeLibrary
RTLIB::RuntimeLibcallsInfo::lookupLibcallImplNameImpl(StringRef Name) {
// CHECK: static constexpr uint16_t HashTableNameToEnum[16] = {
-// CHECK: 2, // 0x00705301b8, ___memset
+// CHECK: 2,
// CHECK: 0,
-// CHECK: 6, // 0x001417a2af, calloc
+// CHECK: 6,
// CHECK: 0,
// CHECK: };
diff --git a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
index c305e6323ca9d..a8ec873f4587e 100644
--- a/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
+++ b/llvm/utils/TableGen/Basic/RuntimeLibcallsEmitter.cpp
@@ -287,13 +287,6 @@ class RuntimeLibcallEmitter {
void run(raw_ostream &OS);
};
-/// Helper struct for the name hash table.
-struct LookupEntry {
- StringRef FuncName;
- uint64_t Hash = 0;
- unsigned TableValue = 0;
-};
-
} // End anonymous namespace.
void RuntimeLibcallEmitter::emitGetRuntimeLibcallEnum(raw_ostream &OS) const {
@@ -339,14 +332,17 @@ static void emitHashFunction(raw_ostream &OS) {
/// Return the table size, maximum number of collisions for the set of hashes
static std::pair
computePerfectHashParameters(ArrayRef Hashes) {
- const int SizeOverhead = 10;
- const int NumHashes = Hashes.size();
+ // Chosen based on experimentation with llvm/benchmarks/RuntimeLibcalls.cpp
+ const int SizeOverhead = 4;
// Index derived from hash -> number of collisions.
DenseMap Table;
+ unsigned NumHashes = Hashes.size();
+
for (int MaxCollisions = 1;; ++MaxCollisions) {
-for (int N = NumHashes; N < SizeOverhead * NumHashes; ++N) {
+for (unsigned N = NextPowerOf2(NumHashes - 1); N < SizeOverhead *
NumHashes;
+ N <<= 1) {
Table.clear();
bool NeedResize = false;
@@ -365,41 +361,29 @@ computePerfectHashParameters(ArrayRef Hashes) {
}
}
-static std::vector
+static std::vector
constructPerfectHashTable(ArrayRef Keywords,
- ArrayRef Hashes, int Size, int Collisions,
- StringToOffsetTable &OffsetTable) {
- DenseSet Seen;
- std::vector Lookup(Size * Collisions);
-
- for (const RuntimeLibcallImpl &LibCallImpl : Keywords) {
-StringRef ImplName = LibCallImpl.getLibcallFuncName();
-
-// We do not want to add repeated entries for cases with the same name,
only
-// an entry for the first, with the name collision enum values immediately
-// following.
-if (!Seen.insert(ImplName).second)
- continue;
-
-uint64_t HashValue = Hashes[LibCallImpl.getEnumVal() - 1];
+ ArrayRef Hashes,
+ ArrayRef TableValues, int Size,
+ int Collisions, StringToOffsetTable &OffsetTable) {
+ std::vector Lookup(Size * Collisions);
+ for (auto [HashValue, TableValue] : zip(Hashes, TableValues)) {
uint64_t Idx = (HashValue % static_cast(Size)) *
static_cast(Collisions);
bool Found = false;
for (int J = 0; J < Collisions; ++J) {
- LookupEntry &Entry = Lookup[Idx + J];
- if (Entry.TableValue == 0) {
-Entry.FuncName = ImplName;
-Entry.TableValue = LibCallImpl.getEnumVal();
-Entry.Hash = HashValue;
+ unsigned &Entry = Lookup[Idx + J];
+ if (Entry == 0) {
+Entry = TableValue;
Found = true;
break;
}
}
if (!Found)
- reportFatalInternalError("failure to hash " + ImplName);
+ reportFatalInternalError("failure to hash");
}
return Lookup;
@@ -409,15 +393,25 @@ constructPerfectHashTable(ArrayRef
Keywords,
void RuntimeLibcallEmitter::emitNameMatchHashTable(
raw_ostream &OS, StringToOffsetTable &OffsetTable) const {
std::vecto
[llvm-branch-commits] [clang] [LifetimeSafety] Prevent duplicate loans and statement visits (PR #153661)
https://github.com/Xazax-hun commented: I am wondering if this is the right approach. If everything works out well, every time we call `Visit` on an expression, there should be a guarantee we already visited all the subexpressions of it (modulo some corner cases with short circuiting operators, ternaries and trivially false branches). So, we might be able to structure the code in a way that we never need to call `Visit` recursively for a subexpression, and we do not need to keep a `VisitedStmts` set. But in case that does not work out for some reason I am also fine with this approach. https://github.com/llvm/llvm-project/pull/153661 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][PAC][ObjC] Merge the block metadata support for the arm64e abi to llvm 21 (PR #153725)
@@ -155,6 +155,7 @@ FEATURE(ptrauth_vtable_pointer_address_discrimination, LangOpts.PointerAuthVTPtr FEATURE(ptrauth_vtable_pointer_type_discrimination, LangOpts.PointerAuthVTPtrTypeDiscrimination) FEATURE(ptrauth_type_info_vtable_pointer_discrimination, LangOpts.PointerAuthTypeInfoVTPtrDiscrimination) FEATURE(ptrauth_member_function_pointer_type_discrimination, LangOpts.PointerAuthCalls) +FEATURE(ptrauth_signed_block_descriptors, LangOpts.PointerAuthBlockDescriptorPointers) AaronBallman wrote: This follows the pattern of the other ptrauth work but none of these should have been exposed as features to begin with... https://github.com/llvm/llvm-project/pull/153725 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][PAC][ObjC] Merge the block metadata support for the arm64e abi to llvm 21 (PR #153725)
https://github.com/AaronBallman edited https://github.com/llvm/llvm-project/pull/153725 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][PAC][ObjC] Merge the block metadata support for the arm64e abi to llvm 21 (PR #153725)
https://github.com/AaronBallman commented: Not opposed but this is a pretty significant amount of change for being this late in the rc cycles, and the changes haven't been upstream for very long. How risky are these changes? https://github.com/llvm/llvm-project/pull/153725 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -0,0 +1,126 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: --filter-out-after "^middle.block:" --filter-out-after "^scalar.ph:"
--version 4
+; RUN: opt -S -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2
-passes=loop-vectorize,instcombine,early-cse
-prefer-predicate-over-epilogue=predicate-dont-vectorize
-force-vector-interleave=1 %s | FileCheck %s
+
+define dso_local void @alias_mask(ptr noalias %a, ptr %b, ptr %c, i64 %n) {
SamTebbs33 wrote:
Done, let me know if anything needs to be added or changed with them.
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -2030,32 +2031,68 @@ Value *llvm::addDiffRuntimeChecks(
// Map to keep track of created compares, The key is the pair of operands for
// the compare, to allow detecting and re-using redundant compares.
DenseMap, Value *> SeenCompares;
- for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
+ Value *AliasLaneMask = nullptr;
+ for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze,
+WriteAfterRead] : Checks) {
Type *Ty = SinkStart->getType();
-// Compute VF * IC * AccessSize.
-auto *VFTimesICTimesSize =
-ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
- ConstantInt::get(Ty, IC * AccessSize));
-Value *Diff =
-Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
-
-// Check if the same compare has already been created earlier. In that
case,
-// there is no need to check it again.
-Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
-if (IsConflict)
- continue;
+if (!VF.isScalar() && UseSafeEltsMask) {
+ Value *Sink = Expander.expandCodeFor(SinkStart, Ty, Loc);
+ Value *Src = Expander.expandCodeFor(SrcStart, Ty, Loc);
+ unsigned IntOpc = WriteAfterRead ? Intrinsic::loop_dependence_war_mask
+ : Intrinsic::loop_dependence_raw_mask;
+ Value *SourceAsPtr = ChkBuilder.CreateCast(Instruction::IntToPtr, Src,
+ ChkBuilder.getPtrTy());
+ Value *SinkAsPtr = ChkBuilder.CreateCast(Instruction::IntToPtr, Sink,
+ ChkBuilder.getPtrTy());
+ Value *M = ChkBuilder.CreateIntrinsic(
+ IntOpc, {VectorType::get(ChkBuilder.getInt1Ty(), VF)},
+ {SourceAsPtr, SinkAsPtr, ChkBuilder.getInt64(AccessSize)}, nullptr,
+ "alias.lane.mask");
+ if (AliasLaneMask)
+M = ChkBuilder.CreateAnd(AliasLaneMask, M);
+ else
+AliasLaneMask = M;
+} else {
+ // Compute VF * IC * AccessSize.
+ auto *VFTimesICTimesSize =
+ ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
+ ConstantInt::get(Ty, IC * AccessSize));
+ Value *Diff =
+ Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty,
Loc);
+
+ // Check if the same compare has already been created earlier. In that
+ // case, there is no need to check it again.
+ Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
SamTebbs33 wrote:
That should help, done.
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -2063,6 +2080,12 @@ static bool
useActiveLaneMaskForControlFlow(TailFoldingStyle Style) {
Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
}
+static bool useSafeEltsMask(TailFoldingStyle TFStyle, RTCheckStyle Style,
+ElementCount VF, const TargetTransformInfo &TTI) {
+ return useActiveLaneMask(TFStyle) && Style == RTCheckStyle::UseSafeEltsMask
&&
+ TTI.useSafeEltsMask(VF);
+}
+
SamTebbs33 wrote:
Done.
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -0,0 +1,126 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^middle.block:" --filter-out-after "^scalar.ph:" --version 4 +; RUN: opt -S -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 -passes=loop-vectorize,instcombine,early-cse -prefer-predicate-over-epilogue=predicate-dont-vectorize -force-vector-interleave=1 %s | FileCheck %s SamTebbs33 wrote: Sounds sensible to me, done. https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -1347,6 +1354,11 @@ class TargetTransformInfo {
PartialReductionExtendKind OpBExtend, std::optional BinOp,
TTI::TargetCostKind CostKind) const;
+ /// \return true if a mask should be formed that disables lanes that could
+ /// alias between two pointers. The mask is created by the
+ /// loop_dependence_{war,raw}_mask intrinsics.
+ LLVM_ABI bool useSafeEltsMask(ElementCount VF) const;
SamTebbs33 wrote:
That would work if I return invalid for the cost, but @sdesmalen-arm has
suggested I calculate the cost of the expanded intrinsic instead of returning
invalid: https://github.com/llvm/llvm-project/pull/100579/files#r2262969228
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -482,11 +482,14 @@ bool RuntimePointerChecking::tryToCreateDiffCheck( } } + bool WriteAfterRead = isa(SrcInsts[0]); SamTebbs33 wrote: That's better, thanks. https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -974,6 +974,11 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, } break; } + case Intrinsic::loop_dependence_raw_mask: + case Intrinsic::loop_dependence_war_mask: +if (ST->hasSVE2()) SamTebbs33 wrote: Done. https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -974,6 +974,11 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, } break; } + case Intrinsic::loop_dependence_raw_mask: + case Intrinsic::loop_dependence_war_mask: +if (ST->hasSVE2()) + return 1; +return InstructionCost::getInvalid(CostKind); SamTebbs33 wrote: It will now get the expanded intrinsic cost instead of returning invalid. https://github.com/llvm/llvm-project/pull/100579 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -5535,6 +5540,11 @@ InstructionCost AArch64TTIImpl::getPartialReductionCost(
return Cost;
}
+bool AArch64TTIImpl::useSafeEltsMask(ElementCount VF) const {
+ // The whilewr/rw instructions require SVE2
+ return ST->hasSVE2();
SamTebbs33 wrote:
Done.
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -201,6 +201,13 @@ enum class TailFoldingStyle {
DataWithEVL,
};
+enum class RTCheckStyle {
+ /// Create runtime checks based on the difference between two pointers
+ ScalarDifference,
+ /// Form a mask based on elements which won't be a WAR or RAW hazard.
+ UseSafeEltsMask,
SamTebbs33 wrote:
I think `NoUnsafeAliasMask` could sound like it means there shouldn't be a
mask, i.e. `No{...}Mask`.
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -2030,32 +2031,68 @@ Value *llvm::addDiffRuntimeChecks(
// Map to keep track of created compares, The key is the pair of operands for
// the compare, to allow detecting and re-using redundant compares.
DenseMap, Value *> SeenCompares;
- for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
+ Value *AliasLaneMask = nullptr;
+ for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze,
+WriteAfterRead] : Checks) {
Type *Ty = SinkStart->getType();
-// Compute VF * IC * AccessSize.
-auto *VFTimesICTimesSize =
-ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
- ConstantInt::get(Ty, IC * AccessSize));
-Value *Diff =
-Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
-
-// Check if the same compare has already been created earlier. In that
case,
-// there is no need to check it again.
-Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
-if (IsConflict)
- continue;
+if (!VF.isScalar() && UseSafeEltsMask) {
SamTebbs33 wrote:
I think that `VF.isScalar()` is actually an unnecessary check since the memory
check block is only used if the LV actually ends up vectorising or if epilogue
vectorisation is on. In the case of epilogue vectorisation it won't use tail
predication and tail predication being off turns off `UseSafeEltsMask`.
As per Florian's suggestion I've separated this into two functions.
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -1347,6 +1354,11 @@ class TargetTransformInfo {
PartialReductionExtendKind OpBExtend, std::optional BinOp,
TTI::TargetCostKind CostKind) const;
+ /// \return true if a mask should be formed that disables lanes that could
+ /// alias between two pointers. The mask is created by the
+ /// loop_dependence_{war,raw}_mask intrinsics.
+ LLVM_ABI bool useSafeEltsMask(ElementCount VF) const;
SamTebbs33 wrote:
That's a hold over from a previous prototype, removed.
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Mask off possibly aliasing vector lanes (PR #100579)
@@ -2421,7 +2444,6 @@ void
InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
LoopVectorPreHeader = SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(),
static_cast(nullptr), LI,
nullptr, "vector.ph");
-
SamTebbs33 wrote:
Done.
https://github.com/llvm/llvm-project/pull/100579
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)
https://github.com/rampitec updated
https://github.com/llvm/llvm-project/pull/153881
>From 4ee7e8bf3a0cd4036b601a12bbb5bba61deda993 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin
Date: Fri, 15 Aug 2025 14:18:56 -0700
Subject: [PATCH] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI.
Co-authored-by: Stephen Thomas
---
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index fa3ca27a5f47c..49a681efc79c7 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1749,7 +1749,7 @@ bool
GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(0x0fff);
+ .addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0));
return true;
}
@@ -1799,7 +1799,7 @@ bool
GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- I.getOperand(0).getImm() == 0x0fff))
+ AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
return HazardExpired;
// Track registers writes
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)
https://github.com/rampitec updated
https://github.com/llvm/llvm-project/pull/153880
>From df0ab0abe2132a729ec1ad18f20faa9b804f0a6f Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin
Date: Fri, 15 Aug 2025 14:10:55 -0700
Subject: [PATCH] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12
waitcount instructions
This simply updates the pass's cognizance of these instructions, and for the
most part the hazards where they might be encountered do not exist for gfx12.
Nonetheless, encountering them has to be checked for as doing so would indicate
a compiler error.
Co-authored-by: Stephen Thomas
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 19 +++
1 file changed, 19 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c1cca063aac6f..fa3ca27a5f47c 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1357,6 +1357,16 @@ bool
GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
// DsCnt corresponds to LGKMCnt here.
return (Decoded.DsCnt == 0);
}
+ case AMDGPU::S_WAIT_STORECNT:
+ case AMDGPU::S_WAIT_STORECNT_DSCNT:
+ case AMDGPU::S_WAIT_LOADCNT:
+ case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+ case AMDGPU::S_WAIT_SAMPLECNT:
+ case AMDGPU::S_WAIT_BVHCNT:
+ case AMDGPU::S_WAIT_DSCNT:
+ case AMDGPU::S_WAIT_EXPCNT:
+ case AMDGPU::S_WAIT_KMCNT:
+llvm_unreachable("unexpected wait count instruction");
default:
// SOPP instructions cannot mitigate the hazard.
if (TII->isSOPP(MI))
@@ -2254,6 +2264,15 @@ int
GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
case AMDGPU::S_WAIT_IDLE:
+case AMDGPU::S_WAIT_LOADCNT:
+case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+case AMDGPU::S_WAIT_SAMPLECNT:
+case AMDGPU::S_WAIT_BVHCNT:
+case AMDGPU::S_WAIT_STORECNT:
+case AMDGPU::S_WAIT_STORECNT_DSCNT:
+case AMDGPU::S_WAIT_EXPCNT:
+case AMDGPU::S_WAIT_DSCNT:
+case AMDGPU::S_WAIT_KMCNT:
return true;
default:
break;
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)
https://github.com/rampitec updated
https://github.com/llvm/llvm-project/pull/153880
>From df0ab0abe2132a729ec1ad18f20faa9b804f0a6f Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin
Date: Fri, 15 Aug 2025 14:10:55 -0700
Subject: [PATCH] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12
waitcount instructions
This simply updates the pass's cognizance of these instructions, and for the
most part the hazards where they might be encountered do not exist for gfx12.
Nonetheless, encountering them has to be checked for as doing so would indicate
a compiler error.
Co-authored-by: Stephen Thomas
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 19 +++
1 file changed, 19 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c1cca063aac6f..fa3ca27a5f47c 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1357,6 +1357,16 @@ bool
GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
// DsCnt corresponds to LGKMCnt here.
return (Decoded.DsCnt == 0);
}
+ case AMDGPU::S_WAIT_STORECNT:
+ case AMDGPU::S_WAIT_STORECNT_DSCNT:
+ case AMDGPU::S_WAIT_LOADCNT:
+ case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+ case AMDGPU::S_WAIT_SAMPLECNT:
+ case AMDGPU::S_WAIT_BVHCNT:
+ case AMDGPU::S_WAIT_DSCNT:
+ case AMDGPU::S_WAIT_EXPCNT:
+ case AMDGPU::S_WAIT_KMCNT:
+llvm_unreachable("unexpected wait count instruction");
default:
// SOPP instructions cannot mitigate the hazard.
if (TII->isSOPP(MI))
@@ -2254,6 +2264,15 @@ int
GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
case AMDGPU::S_WAIT_IDLE:
+case AMDGPU::S_WAIT_LOADCNT:
+case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+case AMDGPU::S_WAIT_SAMPLECNT:
+case AMDGPU::S_WAIT_BVHCNT:
+case AMDGPU::S_WAIT_STORECNT:
+case AMDGPU::S_WAIT_STORECNT_DSCNT:
+case AMDGPU::S_WAIT_EXPCNT:
+case AMDGPU::S_WAIT_DSCNT:
+case AMDGPU::S_WAIT_KMCNT:
return true;
default:
break;
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)
https://github.com/rampitec updated
https://github.com/llvm/llvm-project/pull/153881
>From 4ee7e8bf3a0cd4036b601a12bbb5bba61deda993 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin
Date: Fri, 15 Aug 2025 14:18:56 -0700
Subject: [PATCH] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI.
Co-authored-by: Stephen Thomas
---
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index fa3ca27a5f47c..49a681efc79c7 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1749,7 +1749,7 @@ bool
GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(0x0fff);
+ .addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0));
return true;
}
@@ -1799,7 +1799,7 @@ bool
GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- I.getOperand(0).getImm() == 0x0fff))
+ AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
return HazardExpired;
// Track registers writes
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 7aa3dbc - Revert "Remember LLVM_ENABLE_LIBCXX setting in installed configuration (#139712)"
Author: gulfemsavrun
Date: 2025-08-15T16:00:12-07:00
New Revision: 7aa3dbcae91ade86e362fa6ef6a739f839490cbd
URL:
https://github.com/llvm/llvm-project/commit/7aa3dbcae91ade86e362fa6ef6a739f839490cbd
DIFF:
https://github.com/llvm/llvm-project/commit/7aa3dbcae91ade86e362fa6ef6a739f839490cbd.diff
LOG: Revert "Remember LLVM_ENABLE_LIBCXX setting in installed configuration
(#139712)"
This reverts commit b010b7ea89fdb870024b94913b2b784ce1f4f8d4.
Added:
Modified:
llvm/cmake/modules/HandleLLVMStdlib.cmake
llvm/cmake/modules/LLVMConfig.cmake.in
Removed:
diff --git a/llvm/cmake/modules/HandleLLVMStdlib.cmake
b/llvm/cmake/modules/HandleLLVMStdlib.cmake
index dda1caa846dcb..a7e138aa0789b 100644
--- a/llvm/cmake/modules/HandleLLVMStdlib.cmake
+++ b/llvm/cmake/modules/HandleLLVMStdlib.cmake
@@ -2,7 +2,6 @@
# if the user has requested it.
include(DetermineGCCCompatible)
-include(CheckIncludeFiles)
if(NOT DEFINED LLVM_STDLIB_HANDLED)
set(LLVM_STDLIB_HANDLED ON)
@@ -20,17 +19,7 @@ if(NOT DEFINED LLVM_STDLIB_HANDLED)
if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
check_cxx_compiler_flag("-stdlib=libc++" CXX_COMPILER_SUPPORTS_STDLIB)
check_linker_flag(CXX "-stdlib=libc++" CXX_LINKER_SUPPORTS_STDLIB)
-
- # Check whether C++ include files are available
- # runtimes/CMakeLists.txt adds -nostdlib++ and -nostdinc++ to
- # CMAKE_REQUIRED_FLAGS, which are incompatible with -stdlib=libc++; use
- # a fresh CMAKE_REQUIRED_FLAGS environment.
- cmake_push_check_state(RESET)
- set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -stdlib=libc++")
- check_include_files("chrono" CXX_COMPILER_SUPPORTS_STDLIB_CHRONO
LANGUAGE CXX)
- cmake_pop_check_state()
-
- if(CXX_COMPILER_SUPPORTS_STDLIB AND CXX_LINKER_SUPPORTS_STDLIB AND
CXX_COMPILER_SUPPORTS_STDLIB_CHRONO)
+ if(CXX_COMPILER_SUPPORTS_STDLIB AND CXX_LINKER_SUPPORTS_STDLIB)
append("-stdlib=libc++"
CMAKE_CXX_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS
CMAKE_MODULE_LINKER_FLAGS)
diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in
b/llvm/cmake/modules/LLVMConfig.cmake.in
index c39c33f0c7793..c15b9576cd5d5 100644
--- a/llvm/cmake/modules/LLVMConfig.cmake.in
+++ b/llvm/cmake/modules/LLVMConfig.cmake.in
@@ -55,8 +55,6 @@ endif()
set(LLVM_ENABLE_RTTI @LLVM_ENABLE_RTTI@)
-set(LLVM_ENABLE_LIBCXX @LLVM_ENABLE_LIBCXX@)
-
set(LLVM_ENABLE_LIBEDIT @HAVE_LIBEDIT@)
if(LLVM_ENABLE_LIBEDIT)
find_package(LibEdit)
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CAS] Add MappedFileRegionBumpPtr (PR #114099)
https://github.com/cachemeifyoucan updated https://github.com/llvm/llvm-project/pull/114099 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CAS] Add MappedFileRegionBumpPtr (PR #114099)
https://github.com/cachemeifyoucan updated https://github.com/llvm/llvm-project/pull/114099 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)
https://github.com/changpeng approved this pull request. https://github.com/llvm/llvm-project/pull/153881 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)
https://github.com/changpeng approved this pull request. https://github.com/llvm/llvm-project/pull/153880 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE register (PR #153879)
https://github.com/changpeng approved this pull request. https://github.com/llvm/llvm-project/pull/153879 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Track Linker Relaxable through Assembly Relaxation (PR #153670)
https://github.com/lenary edited https://github.com/llvm/llvm-project/pull/153670 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Github] Drop llvm-project-tests (PR #153877)
llvmbot wrote:
@llvm/pr-subscribers-github-workflow
Author: Aiden Grossman (boomanaiden154)
Changes
All users of this have been claned up so we can now drop it fully.
---
Full diff: https://github.com/llvm/llvm-project/pull/153877.diff
2 Files Affected:
- (removed) .github/workflows/llvm-project-tests.yml (-149)
- (removed) .github/workflows/llvm-project-workflow-tests.yml (-32)
``diff
diff --git a/.github/workflows/llvm-project-tests.yml
b/.github/workflows/llvm-project-tests.yml
deleted file mode 100644
index 8621a3b59218e..0
--- a/.github/workflows/llvm-project-tests.yml
+++ /dev/null
@@ -1,149 +0,0 @@
-name: LLVM Project Tests
-
-permissions:
- contents: read
-
-on:
- workflow_dispatch:
-inputs:
- build_target:
-required: false
- projects:
-required: false
- extra_cmake_args:
-required: false
- os_list:
-required: false
-default: '["ubuntu-24.04", "windows-2019", "macOS-13"]'
- python_version:
-required: false
-type: string
-default: '3.11'
- workflow_call:
-inputs:
- build_target:
-required: false
-type: string
-default: "all"
-
- projects:
-required: true
-type: string
-
- extra_cmake_args:
-required: false
-type: string
-
- os_list:
-required: false
-type: string
-# Use windows-2019 due to:
-#
https://developercommunity.visualstudio.com/t/Prev-Issue---with-__assume-isnan-/1597317
-default: '["ubuntu-24.04", "windows-2019", "macOS-13"]'
-
- python_version:
-required: false
-type: string
-default: '3.11'
-
-concurrency:
- # Skip intermediate builds: always.
- # Cancel intermediate builds: only if it is a pull request build.
- # If the group name here is the same as the group name in the workflow that
includes
- # this one, then the action will try to wait on itself and get stuck.
- group: llvm-project-${{ github.workflow }}-${{ inputs.projects }}-${{
inputs.python_version }}${{ github.ref }}
- cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
-
-jobs:
- lit-tests:
-name: Lit Tests
-runs-on: ${{ matrix.os }}
-container:
- image: ${{(startsWith(matrix.os, 'ubuntu') &&
'ghcr.io/llvm/ci-ubuntu-24.04:latest') || null}}
- volumes:
-- /mnt/:/mnt/
-strategy:
- fail-fast: false
- matrix:
-os: ${{ fromJSON(inputs.os_list) }}
-steps:
- - name: Setup Windows
-if: startsWith(matrix.os, 'windows')
-uses: llvm/actions/setup-windows@main
-with:
- arch: amd64
- # On Windows, starting with win19/20220814.1, cmake choose the 32-bit
- # python3.10.6 libraries instead of the 64-bit libraries when building
- # lldb. Using this setup-python action to make 3.10 the default
- # python fixes this.
- - name: Setup Python
-uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 #
v5.4.0
-with:
- python-version: ${{ inputs.python_version }}
- - name: Install Ninja
-if: runner.os != 'Linux'
-uses: llvm/actions/install-ninja@main
- # actions/checkout deletes any existing files in the new git directory,
- # so this needs to either run before ccache-action or it has to use
- # clean: false.
- - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 #
v5.0.0
-with:
- fetch-depth: 250
- - name: Setup ccache
-uses:
hendrikmuhs/ccache-action@a1209f81afb8c005c13b4296c32e363431bffea5 # v1.2.17
-with:
- # A full build of llvm, clang, lld, and lldb takes about 250MB
- # of ccache space. There's not much reason to have more than this,
- # because we usually won't need to save cache entries from older
- # builds. Also, there is an overall 10GB cache limit, and each
- # run creates a new cache entry so we want to ensure that we have
- # enough cache space for all the tests to run at once and still
- # fit under the 10 GB limit.
- # Default to 2G to workaround:
https://github.com/hendrikmuhs/ccache-action/issues/174
- max-size: 2G
- key: ${{ matrix.os }}
- variant: sccache
- - name: Build and Test
-env:
- # Workaround for
https://github.com/actions/virtual-environments/issues/5900.
- # This should be a no-op for non-mac OSes
- PKG_CONFIG_PATH:
/usr/local/Homebrew/Library/Homebrew/os/mac/pkgconfig//12
-shell: bash
-id: build-llvm
-run: |
- if [ "${{ runner.os }}" == "Linux" ]; then
-builddir="/mnt/build/"
-sudo mkdir -p $builddir
-sudo chown gha $builddir
-extra_cmake_args="-DCMAKE_CXX_COMPILER=clang++
-DCMAKE_C_COMPILER=clang"
- else
-builddir="$(pwd)"/build
[llvm-branch-commits] [Github] Remove call to llvm-project-tests from libclang tests (PR #153876)
llvmbot wrote:
@llvm/pr-subscribers-github-workflow
Author: Aiden Grossman (boomanaiden154)
Changes
This allows for removing llvm-project-tests.yml. This significantly
reduces the complexity of this workflow (including the complexity of
llvm-project-tests.yml) at the cost of a little bit of duplication with
the other workflows that were also using llvm-project-tests.yml.
---
Full diff: https://github.com/llvm/llvm-project/pull/153876.diff
1 Files Affected:
- (modified) .github/workflows/libclang-python-tests.yml (+27-8)
``diff
diff --git a/.github/workflows/libclang-python-tests.yml
b/.github/workflows/libclang-python-tests.yml
index 50ef4acf2feb1..edd2f774621b6 100644
--- a/.github/workflows/libclang-python-tests.yml
+++ b/.github/workflows/libclang-python-tests.yml
@@ -25,17 +25,36 @@ on:
jobs:
check-clang-python:
# Build libclang and then run the libclang Python binding's unit tests.
+# There is an issue running on "windows-2019".
+# See
https://github.com/llvm/llvm-project/issues/76601#issuecomment-1873049082.
name: Build and run Python unit tests
if: github.repository == 'llvm/llvm-project'
+runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.13"]
-uses: ./.github/workflows/llvm-project-tests.yml
-with:
- build_target: check-clang-python
- projects: clang
- # There is an issue running on "windows-2019".
- # See
https://github.com/llvm/llvm-project/issues/76601#issuecomment-1873049082.
- os_list: '["ubuntu-24.04"]'
- python_version: ${{ matrix.python-version }}
+steps:
+ - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 #
v5.0.0
+ - name: Setup Python
+uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 #
v5.4.0
+with:
+ python-version: ${{ matrix.python_version }}
+ - name: Setup ccache
+uses:
hendrikmuhs/ccache-action@a1209f81afb8c005c13b4296c32e363431bffea5 # v1.2.17
+with:
+ max-size: 2G
+ key: spirv-ubuntu-24.04
+ variant: sccache
+ - name: Build and Test
+run: |
+ mkdir build
+ cmake -GNinja \
+-S llvm \
+-B build \
+-DCMAKE_BUILD_TYPE=Release \
+-DLLVM_ENABLE_ASSERTIONS=ON \
+-DCMAKE_C_COMPILER_LAUNCHER=sccache \
+-DCMAKE_CXX_COMPILER_LAUNCHER=sccache \
+-DLLVM_ENABLE_PROJECTS=clang
+ ninja -C build check-clang-python
``
https://github.com/llvm/llvm-project/pull/153876
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Github] Drop llvm-project-tests (PR #153877)
https://github.com/boomanaiden154 created https://github.com/llvm/llvm-project/pull/153877 All users of this have been claned up so we can now drop it fully. ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)
https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/153873 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)
https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/153873 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE register (PR #153879)
https://github.com/rampitec created
https://github.com/llvm/llvm-project/pull/153879
None
>From 32fc4952dafa723bdff1f26f717b87cd8f4464b1 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin
Date: Fri, 15 Aug 2025 14:03:15 -0700
Subject: [PATCH] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE
register
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 12 +
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++
llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir | 54 +++
4 files changed, 71 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index dd7c1914d3440..c1cca063aac6f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1206,6 +1206,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixDsAtomicAsyncBarrierArriveB64(MI);
if (ST.hasScratchBaseForwardingHazard())
fixScratchBaseForwardingHazard(MI);
+ if (ST.setRegModeNeedsVNOPs())
+fixSetRegMode(MI);
}
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo
&TRI,
@@ -3546,3 +3548,13 @@ bool
GCNHazardRecognizer::fixScratchBaseForwardingHazard(MachineInstr *MI) {
AMDGPU::DepCtr::encodeFieldSaSdst(0), 0));
return true;
}
+
+bool GCNHazardRecognizer::fixSetRegMode(MachineInstr *MI) {
+ if (!isSSetReg(MI->getOpcode()) ||
+ MI->getOperand(1).getImm() != AMDGPU::Hwreg::ID_MODE)
+return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index e0982b46424b9..67beffadc0913 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -113,6 +113,7 @@ class GCNHazardRecognizer final : public
ScheduleHazardRecognizer {
bool fixGetRegWaitIdle(MachineInstr *MI);
bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
bool fixScratchBaseForwardingHazard(MachineInstr *MI);
+ bool fixSetRegMode(MachineInstr *MI);
int checkMAIHazards(MachineInstr *MI);
int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 404a476a3076a..2a8385df3f934 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1345,6 +1345,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
+ bool setRegModeNeedsVNOPs() const {
+return GFX1250Insts && getGeneration() == GFX12;
+ }
+
/// Return if operations acting on VGPR tuples require even alignment.
bool needsAlignedVGPRs() const { return GFX90AInsts || GFX1250Insts; }
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
index f4596b0832d97..170478539d8a9 100644
--- a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
@@ -493,3 +493,57 @@ body: |
liveins: $vgpr0
$vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
...
+
+---
+name: s_setreg_b32_hwreg_mode
+tracksRegLiveness: true
+body: |
+ bb.0:
+liveins: $sgpr0
+; GCN-LABEL: name: s_setreg_b32_hwreg_mode
+; GCN: liveins: $sgpr0
+; GCN-NEXT: {{ $}}
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
+S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_b32_mode
+tracksRegLiveness: true
+body: |
+ bb.0:
+liveins: $sgpr0
+; GCN-LABEL: name: s_setreg_b32_mode
+; GCN: liveins: $sgpr0
+; GCN-NEXT: {{ $}}
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
+S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_imm32_b32_hwreg_mode
+tracksRegLiveness: true
+body: |
+ bb.0:
+; GCN-LABEL: name: s_setreg_imm32_b32_hwreg_mode
+; GCN: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode
+S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_imm32_b32_mode
+tracksRegLiveness: true
+body: |
+ bb.0:
+; GCN-LABEL: name: s_setreg_imm32_b32_mode
+; GCN: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit
$mode
+S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit $mode
+...
_
[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)
https://github.com/rampitec created
https://github.com/llvm/llvm-project/pull/153880
This simply updates the pass's cognizance of these instructions, and for the
most part the hazards where they might be encountered do not exist for gfx12.
Nonetheless, encountering them has to be checked for as doing so would indicate
a compiler error.
Co-authored-by: Stephen Thomas
>From 2f96c402497f80f8d31e4229f03b3ef8dd88cf4d Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin
Date: Fri, 15 Aug 2025 14:10:55 -0700
Subject: [PATCH] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12
waitcount instructions
This simply updates the pass's cognizance of these instructions, and for the
most part the hazards where they might be encountered do not exist for gfx12.
Nonetheless, encountering them has to be checked for as doing so would indicate
a compiler error.
Co-authored-by: Stephen Thomas
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 19 +++
1 file changed, 19 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index c1cca063aac6f..fa3ca27a5f47c 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1357,6 +1357,16 @@ bool
GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
// DsCnt corresponds to LGKMCnt here.
return (Decoded.DsCnt == 0);
}
+ case AMDGPU::S_WAIT_STORECNT:
+ case AMDGPU::S_WAIT_STORECNT_DSCNT:
+ case AMDGPU::S_WAIT_LOADCNT:
+ case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+ case AMDGPU::S_WAIT_SAMPLECNT:
+ case AMDGPU::S_WAIT_BVHCNT:
+ case AMDGPU::S_WAIT_DSCNT:
+ case AMDGPU::S_WAIT_EXPCNT:
+ case AMDGPU::S_WAIT_KMCNT:
+llvm_unreachable("unexpected wait count instruction");
default:
// SOPP instructions cannot mitigate the hazard.
if (TII->isSOPP(MI))
@@ -2254,6 +2264,15 @@ int
GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
case AMDGPU::S_WAITCNT_EXPCNT:
case AMDGPU::S_WAITCNT_LGKMCNT:
case AMDGPU::S_WAIT_IDLE:
+case AMDGPU::S_WAIT_LOADCNT:
+case AMDGPU::S_WAIT_LOADCNT_DSCNT:
+case AMDGPU::S_WAIT_SAMPLECNT:
+case AMDGPU::S_WAIT_BVHCNT:
+case AMDGPU::S_WAIT_STORECNT:
+case AMDGPU::S_WAIT_STORECNT_DSCNT:
+case AMDGPU::S_WAIT_EXPCNT:
+case AMDGPU::S_WAIT_DSCNT:
+case AMDGPU::S_WAIT_KMCNT:
return true;
default:
break;
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE register (PR #153879)
rampitec wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/153879?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#153881** https://app.graphite.dev/github/pr/llvm/llvm-project/153881?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#153880** https://app.graphite.dev/github/pr/llvm/llvm-project/153880?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#153879** https://app.graphite.dev/github/pr/llvm/llvm-project/153879?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/153879?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#153878** https://app.graphite.dev/github/pr/llvm/llvm-project/153878?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/153879 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)
https://github.com/rampitec created
https://github.com/llvm/llvm-project/pull/153881
Co-authored-by: Stephen Thomas
>From 0c71fc2a1f291f245dabec98199295b3edd392e5 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin
Date: Fri, 15 Aug 2025 14:18:56 -0700
Subject: [PATCH] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI.
Co-authored-by: Stephen Thomas
---
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index fa3ca27a5f47c..49a681efc79c7 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1749,7 +1749,7 @@ bool
GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(0x0fff);
+ .addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0));
return true;
}
@@ -1799,7 +1799,7 @@ bool
GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- I.getOperand(0).getImm() == 0x0fff))
+ AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
return HazardExpired;
// Track registers writes
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)
rampitec wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/153880?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#153881** https://app.graphite.dev/github/pr/llvm/llvm-project/153881?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#153880** https://app.graphite.dev/github/pr/llvm/llvm-project/153880?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/153880?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#153879** https://app.graphite.dev/github/pr/llvm/llvm-project/153879?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#153878** https://app.graphite.dev/github/pr/llvm/llvm-project/153878?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/153880 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)
rampitec wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/153881?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#153881** https://app.graphite.dev/github/pr/llvm/llvm-project/153881?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/153881?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#153880** https://app.graphite.dev/github/pr/llvm/llvm-project/153880?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#153879** https://app.graphite.dev/github/pr/llvm/llvm-project/153879?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#153878** https://app.graphite.dev/github/pr/llvm/llvm-project/153878?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/153881 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE register (PR #153879)
https://github.com/rampitec ready_for_review https://github.com/llvm/llvm-project/pull/153879 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] w/a for s_setreg_b32 gfx1250 hazard with MODE register (PR #153879)
llvmbot wrote:
@llvm/pr-subscribers-backend-amdgpu
Author: Stanislav Mekhanoshin (rampitec)
Changes
---
Full diff: https://github.com/llvm/llvm-project/pull/153879.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+12)
- (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h (+1)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+4)
- (modified) llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir (+54)
``diff
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index dd7c1914d3440..c1cca063aac6f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1206,6 +1206,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixDsAtomicAsyncBarrierArriveB64(MI);
if (ST.hasScratchBaseForwardingHazard())
fixScratchBaseForwardingHazard(MI);
+ if (ST.setRegModeNeedsVNOPs())
+fixSetRegMode(MI);
}
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo
&TRI,
@@ -3546,3 +3548,13 @@ bool
GCNHazardRecognizer::fixScratchBaseForwardingHazard(MachineInstr *MI) {
AMDGPU::DepCtr::encodeFieldSaSdst(0), 0));
return true;
}
+
+bool GCNHazardRecognizer::fixSetRegMode(MachineInstr *MI) {
+ if (!isSSetReg(MI->getOpcode()) ||
+ MI->getOperand(1).getImm() != AMDGPU::Hwreg::ID_MODE)
+return false;
+
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index e0982b46424b9..67beffadc0913 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -113,6 +113,7 @@ class GCNHazardRecognizer final : public
ScheduleHazardRecognizer {
bool fixGetRegWaitIdle(MachineInstr *MI);
bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
bool fixScratchBaseForwardingHazard(MachineInstr *MI);
+ bool fixSetRegMode(MachineInstr *MI);
int checkMAIHazards(MachineInstr *MI);
int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 404a476a3076a..2a8385df3f934 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1345,6 +1345,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasVALUReadSGPRHazard() const { return GFX12Insts && !GFX1250Insts; }
+ bool setRegModeNeedsVNOPs() const {
+return GFX1250Insts && getGeneration() == GFX12;
+ }
+
/// Return if operations acting on VGPR tuples require even alignment.
bool needsAlignedVGPRs() const { return GFX90AInsts || GFX1250Insts; }
diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
index f4596b0832d97..170478539d8a9 100644
--- a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir
@@ -493,3 +493,57 @@ body: |
liveins: $vgpr0
$vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec
...
+
+---
+name: s_setreg_b32_hwreg_mode
+tracksRegLiveness: true
+body: |
+ bb.0:
+liveins: $sgpr0
+; GCN-LABEL: name: s_setreg_b32_hwreg_mode
+; GCN: liveins: $sgpr0
+; GCN-NEXT: {{ $}}
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
+S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_b32_mode
+tracksRegLiveness: true
+body: |
+ bb.0:
+liveins: $sgpr0
+; GCN-LABEL: name: s_setreg_b32_mode
+; GCN: liveins: $sgpr0
+; GCN-NEXT: {{ $}}
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
+S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_imm32_b32_hwreg_mode
+tracksRegLiveness: true
+body: |
+ bb.0:
+; GCN-LABEL: name: s_setreg_imm32_b32_hwreg_mode
+; GCN: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode
+S_SETREG_IMM32_B32 1, 1, implicit-def $mode, implicit $mode
+...
+
+---
+name: s_setreg_imm32_b32_mode
+tracksRegLiveness: true
+body: |
+ bb.0:
+; GCN-LABEL: name: s_setreg_imm32_b32_mode
+; GCN: V_NOP_e32 implicit $exec
+; GCN-NEXT: V_NOP_e32 implicit $exec
+; GCN-NEXT: S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit
$mode
+S_SETREG_IMM32_B32_mode 1, 1, implicit-def $mode, implicit $mode
+...
``
https://github.com/llvm/llvm-project/pull/153879
___
llvm-branch-commits m
[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)
https://github.com/rampitec ready_for_review https://github.com/llvm/llvm-project/pull/153880 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Update GCNHazardRecognizer's understanding of gfx12 waitcount instructions (PR #153880)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) Changes This simply updates the pass's cognizance of these instructions, and for the most part the hazards where they might be encountered do not exist for gfx12. Nonetheless, encountering them has to be checked for as doing so would indicate a compiler error. Co-authored-by: Stephen Thomas--- Full diff: https://github.com/llvm/llvm-project/pull/153880.diff 1 Files Affected: - (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+19) ``diff diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index c1cca063aac6f..fa3ca27a5f47c 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1357,6 +1357,16 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) { // DsCnt corresponds to LGKMCnt here. return (Decoded.DsCnt == 0); } + case AMDGPU::S_WAIT_STORECNT: + case AMDGPU::S_WAIT_STORECNT_DSCNT: + case AMDGPU::S_WAIT_LOADCNT: + case AMDGPU::S_WAIT_LOADCNT_DSCNT: + case AMDGPU::S_WAIT_SAMPLECNT: + case AMDGPU::S_WAIT_BVHCNT: + case AMDGPU::S_WAIT_DSCNT: + case AMDGPU::S_WAIT_EXPCNT: + case AMDGPU::S_WAIT_KMCNT: +llvm_unreachable("unexpected wait count instruction"); default: // SOPP instructions cannot mitigate the hazard. if (TII->isSOPP(MI)) @@ -2254,6 +2264,15 @@ int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) { case AMDGPU::S_WAITCNT_EXPCNT: case AMDGPU::S_WAITCNT_LGKMCNT: case AMDGPU::S_WAIT_IDLE: +case AMDGPU::S_WAIT_LOADCNT: +case AMDGPU::S_WAIT_LOADCNT_DSCNT: +case AMDGPU::S_WAIT_SAMPLECNT: +case AMDGPU::S_WAIT_BVHCNT: +case AMDGPU::S_WAIT_STORECNT: +case AMDGPU::S_WAIT_STORECNT_DSCNT: +case AMDGPU::S_WAIT_EXPCNT: +case AMDGPU::S_WAIT_DSCNT: +case AMDGPU::S_WAIT_KMCNT: return true; default: break; `` https://github.com/llvm/llvm-project/pull/153880 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)
https://github.com/rampitec ready_for_review https://github.com/llvm/llvm-project/pull/153881 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Use encodeFieldVaVdst in hazard recognizer. NFCI. (PR #153881)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) Changes Co-authored-by: Stephen Thomas--- Full diff: https://github.com/llvm/llvm-project/pull/153881.diff 1 Files Affected: - (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+2-2) ``diff diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index fa3ca27a5f47c..49a681efc79c7 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1749,7 +1749,7 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) { BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(0x0fff); + .addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0)); return true; } @@ -1799,7 +1799,7 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) { if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) || (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - I.getOperand(0).getImm() == 0x0fff)) + AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0)) return HazardExpired; // Track registers writes `` https://github.com/llvm/llvm-project/pull/153881 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)
https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/153873 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)
https://github.com/pcc updated https://github.com/llvm/llvm-project/pull/153873 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Github] Remove call to llvm-project-tests.yml from mlir-spirv-tests.yml (PR #153871)
https://github.com/boomanaiden154 created https://github.com/llvm/llvm-project/pull/153871 This will eventually allow for removing llvm-project-tests.yml. This should significantly reduce the complexity of this workflow (including the complexity of llvm-project-tests.yml) at the cost of a little bit of duplication. ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Github] Remove call to llvm-project-tests.yml from mlir-spirv-tests.yml (PR #153871)
llvmbot wrote: @llvm/pr-subscribers-github-workflow Author: Aiden Grossman (boomanaiden154) Changes This will eventually allow for removing llvm-project-tests.yml. This should significantly reduce the complexity of this workflow (including the complexity of llvm-project-tests.yml) at the cost of a little bit of duplication. --- Full diff: https://github.com/llvm/llvm-project/pull/153871.diff 1 Files Affected: - (modified) .github/workflows/mlir-spirv-tests.yml (+25-6) ``diff diff --git a/.github/workflows/mlir-spirv-tests.yml b/.github/workflows/mlir-spirv-tests.yml index 48b6c69a61f50..658858feb8814 100644 --- a/.github/workflows/mlir-spirv-tests.yml +++ b/.github/workflows/mlir-spirv-tests.yml @@ -24,9 +24,28 @@ jobs: check_spirv: if: github.repository_owner == 'llvm' name: Test MLIR SPIR-V -uses: ./.github/workflows/llvm-project-tests.yml -with: - build_target: check-mlir - projects: mlir - extra_cmake_args: '-DLLVM_TARGETS_TO_BUILD="host" -DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON' - os_list: '["ubuntu-24.04"]' +runs-on: ubuntu-24.04 +container: + image: ghcr.io/llvm/ci-ubuntu-24.04:latest +steps: + - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 + - name: Setup ccache +uses: hendrikmuhs/ccache-action@a1209f81afb8c005c13b4296c32e363431bffea5 # v1.2.17 +with: + max-size: 2G + key: spirv-mlir-ubuntu-24.04 + variant: sccache + - name: Build and Test +run: | + mkdir build + cmake -GNinja \ +-S llvm \ +-B build \ +-DCMAKE_BUILD_TYPE=Release \ +-DLLVM_ENABLE_ASSERTIONS=ON \ +-DCMAKE_C_COMPILER_LAUNCHER=sccache \ +-DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ +-DLLVM_TARGETS_TO_BUILD="host" \ +-DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON \ +-DLLVM_TARGETS_TO_BUILD=mlir + ninja -C build check-mlir `` https://github.com/llvm/llvm-project/pull/153871 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Github] Remove call to llvm-project-tests.yml from mlir-spirv-tests.yml (PR #153871)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/153871 >From a61dece065902c60c9ea0f80ed133c4ad92c549f Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 15 Aug 2025 20:31:54 + Subject: [PATCH] fix Created using spr 1.3.6 --- .github/workflows/mlir-spirv-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mlir-spirv-tests.yml b/.github/workflows/mlir-spirv-tests.yml index 658858feb8814..dfd3374dcc799 100644 --- a/.github/workflows/mlir-spirv-tests.yml +++ b/.github/workflows/mlir-spirv-tests.yml @@ -47,5 +47,5 @@ jobs: -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ -DLLVM_TARGETS_TO_BUILD="host" \ -DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON \ --DLLVM_TARGETS_TO_BUILD=mlir +-LLVM_ENABLE_PROJECTS=mlir ninja -C build check-mlir ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Github] Remove call to llvm-project-tests.yml from mlir-spirv-tests.yml (PR #153871)
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/153871 >From a61dece065902c60c9ea0f80ed133c4ad92c549f Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 15 Aug 2025 20:31:54 + Subject: [PATCH 1/2] fix Created using spr 1.3.6 --- .github/workflows/mlir-spirv-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mlir-spirv-tests.yml b/.github/workflows/mlir-spirv-tests.yml index 658858feb8814..dfd3374dcc799 100644 --- a/.github/workflows/mlir-spirv-tests.yml +++ b/.github/workflows/mlir-spirv-tests.yml @@ -47,5 +47,5 @@ jobs: -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ -DLLVM_TARGETS_TO_BUILD="host" \ -DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON \ --DLLVM_TARGETS_TO_BUILD=mlir +-LLVM_ENABLE_PROJECTS=mlir ninja -C build check-mlir >From 41b41b8785f61974fa132f31e9c6a8317d5575ee Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 15 Aug 2025 20:34:19 + Subject: [PATCH 2/2] fix Created using spr 1.3.6 --- .github/workflows/mlir-spirv-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mlir-spirv-tests.yml b/.github/workflows/mlir-spirv-tests.yml index dfd3374dcc799..78952ccad2642 100644 --- a/.github/workflows/mlir-spirv-tests.yml +++ b/.github/workflows/mlir-spirv-tests.yml @@ -47,5 +47,5 @@ jobs: -DCMAKE_CXX_COMPILER_LAUNCHER=sccache \ -DLLVM_TARGETS_TO_BUILD="host" \ -DLLVM_INCLUDE_SPIRV_TOOLS_TESTS=ON \ --LLVM_ENABLE_PROJECTS=mlir +-DLLVM_ENABLE_PROJECTS=mlir ninja -C build check-mlir ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)
llvmbot wrote: @llvm/pr-subscribers-compiler-rt-sanitizer Author: Peter Collingbourne (pcc) Changes With gcc 15 we end up emitting a reference to the std::__glibcxx_assert_fail function because of this change: https://github.com/gcc-mirror/gcc/commit/361d230fd7800a7e749aba8ed020f54f5c26d504 combined with assertion checks in the std::atomic implementation. This reference is undefined with dfsan causing the test to fail. Fix it by defining the macro that disables assertions. --- Full diff: https://github.com/llvm/llvm-project/pull/153873.diff 1 Files Affected: - (modified) compiler-rt/test/dfsan/atomic.cpp (+5-2) ``diff diff --git a/compiler-rt/test/dfsan/atomic.cpp b/compiler-rt/test/dfsan/atomic.cpp index 22ee323c752f8..73e1cbd17a7cd 100644 --- a/compiler-rt/test/dfsan/atomic.cpp +++ b/compiler-rt/test/dfsan/atomic.cpp @@ -1,9 +1,12 @@ -// RUN: %clangxx_dfsan %s -fno-exceptions -o %t && %run %t -// RUN: %clangxx_dfsan -DORIGIN_TRACKING -mllvm -dfsan-track-origins=1 %s -fno-exceptions -o %t && %run %t +// RUN: %clangxx_dfsan %s -fno-exceptions -D_GLIBCXX_NO_ASSERTIONS -o %t && %run %t +// RUN: %clangxx_dfsan -DORIGIN_TRACKING -mllvm -dfsan-track-origins=1 %s -fno-exceptions -D_GLIBCXX_NO_ASSERTIONS -o %t && %run %t // // Use -fno-exceptions to turn off exceptions to avoid instrumenting // __cxa_begin_catch, std::terminate and __gxx_personality_v0. // +// Use -D_GLIBCXX_NO_ASSERTIONS to avoid depending on +// std::__glibcxx_assert_fail with gcc >= 15. +// // TODO: Support builtin atomics. For example, https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html // DFSan instrumentation pass cannot identify builtin callsites yet. `` https://github.com/llvm/llvm-project/pull/153873 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)
https://github.com/pcc created https://github.com/llvm/llvm-project/pull/153873 With gcc 15 we end up emitting a reference to the std::__glibcxx_assert_fail function because of this change: https://github.com/gcc-mirror/gcc/commit/361d230fd7800a7e749aba8ed020f54f5c26d504 combined with assertion checks in the std::atomic implementation. This reference is undefined with dfsan causing the test to fail. Fix it by defining the macro that disables assertions. ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [mlir] [OpenMP][OMPIRBuilder] Use device shared memory for arg structures (PR #150925)
skatrak wrote: I just pushed a commit replacing callbacks with `CodeExtractor` subclasses. I also moved some of the logic to create a `CodeExtractor` from an `OutlineInfo` so that hopefully using these custom `CodeExtractor`s doesn't turn out too much of a hassle. Let me know if this works for you @Meinersbur, @bhandarkar-pranav. Next week I'll try to work on obtaining and using a proper deallocation block, so that we don't have to override it and get everything in better shape for merging. That'll be an additional PR added to the stack. https://github.com/llvm/llvm-project/pull/150925 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Add `split-section` to `llvm-objcopy` and implement it for `DXContainer` (PR #153265)
inbelic wrote: > Rather than a new option, I'd rather you just add support for > `--only-section` and `--dump-section`, which can then be specified in > conjunction with each other to achieve the same result. IIRC, a while back, > `--split-dwo` was considered a mistake, because the same effect could be > achieved by specifying `--extract-dwo` and `--strip-dwo` and `--split-dwo` > doesn't exist in GNU objcopy (which we aim for broad compatibility with). I assume you mean `--remove-section` and `--dump-section`. The issue for our use-case is that `--dump-section` only outputs the section contents, it does not output a valid object with just one section specified. It could be that such an option isn't really applicable to other object formats, and we could go about this as a `DXContainer` specific option. We thought it best not to have a different implementation of `dump-section` for `DXContainer` that outputs more than just the contents. https://github.com/llvm/llvm-project/pull/153265 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)
https://github.com/fmayer approved this pull request. https://github.com/llvm/llvm-project/pull/153873 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Github] Remove call to llvm-project-tests from libclang tests (PR #153876)
https://github.com/boomanaiden154 created https://github.com/llvm/llvm-project/pull/153876 This allows for removing llvm-project-tests.yml. This significantly reduces the complexity of this workflow (including the complexity of llvm-project-tests.yml) at the cost of a little bit of duplication with the other workflows that were also using llvm-project-tests.yml. ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)
pcc wrote: Landed manually https://github.com/llvm/llvm-project/pull/153873 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] dfsan: Fix test with gcc 15. (PR #153873)
https://github.com/pcc closed https://github.com/llvm/llvm-project/pull/153873 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] 3c37d74 - Revert "[flang] Lower EOSHIFT into hlfir.eoshift. (#153106)"
Author: Slava Zakharin
Date: 2025-08-15T17:38:13-07:00
New Revision: 3c37d74984d24b503c05e338f95c5cb1c2bcb57e
URL:
https://github.com/llvm/llvm-project/commit/3c37d74984d24b503c05e338f95c5cb1c2bcb57e
DIFF:
https://github.com/llvm/llvm-project/commit/3c37d74984d24b503c05e338f95c5cb1c2bcb57e.diff
LOG: Revert "[flang] Lower EOSHIFT into hlfir.eoshift. (#153106)"
This reverts commit 25285b3476292fea239fdab945ca39d156c782d5.
Added:
Modified:
flang/lib/Lower/HlfirIntrinsics.cpp
Removed:
flang/test/Lower/HLFIR/eoshift.f90
diff --git a/flang/lib/Lower/HlfirIntrinsics.cpp
b/flang/lib/Lower/HlfirIntrinsics.cpp
index 3b0f2e35cd5b5..6e1d06a25924b 100644
--- a/flang/lib/Lower/HlfirIntrinsics.cpp
+++ b/flang/lib/Lower/HlfirIntrinsics.cpp
@@ -170,17 +170,6 @@ class HlfirCShiftLowering : public
HlfirTransformationalIntrinsic {
mlir::Type stmtResultType) override;
};
-class HlfirEOShiftLowering : public HlfirTransformationalIntrinsic {
-public:
- using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic;
-
-protected:
- mlir::Value
- lowerImpl(const Fortran::lower::PreparedActualArguments &loweredActuals,
-const fir::IntrinsicArgumentLoweringRules *argLowering,
-mlir::Type stmtResultType) override;
-};
-
class HlfirReshapeLowering : public HlfirTransformationalIntrinsic {
public:
using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic;
@@ -441,33 +430,6 @@ mlir::Value HlfirCShiftLowering::lowerImpl(
return createOp(resultType, operands);
}
-mlir::Value HlfirEOShiftLowering::lowerImpl(
-const Fortran::lower::PreparedActualArguments &loweredActuals,
-const fir::IntrinsicArgumentLoweringRules *argLowering,
-mlir::Type stmtResultType) {
- auto operands = getOperandVector(loweredActuals, argLowering);
- assert(operands.size() == 4);
- mlir::Value array = operands[0];
- mlir::Value shift = operands[1];
- mlir::Value boundary = operands[2];
- mlir::Value dim = operands[3];
- // If DIM is present, then dereference it if it is a ref.
- if (dim)
-dim = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{dim});
-
- mlir::Type resultType = computeResultType(array, stmtResultType);
-
- // Scalar logical constant boundary might be represented using i1, i2, ...
- // type. We need to cast it to fir.logical type of the ARRAY/result.
- if (auto logicalTy = mlir::dyn_cast(
- hlfir::getFortranElementType(resultType)))
-if (boundary && fir::isa_trivial(boundary.getType()) &&
-boundary.getType() != logicalTy)
- boundary = builder.createConvert(loc, logicalTy, boundary);
-
- return createOp(resultType, array, shift, boundary, dim);
-}
-
mlir::Value HlfirReshapeLowering::lowerImpl(
const Fortran::lower::PreparedActualArguments &loweredActuals,
const fir::IntrinsicArgumentLoweringRules *argLowering,
@@ -527,9 +489,6 @@ std::optional
Fortran::lower::lowerHlfirIntrinsic(
if (name == "cshift")
return HlfirCShiftLowering{builder, loc}.lower(loweredActuals, argLowering,
stmtResultType);
- if (name == "eoshift")
-return HlfirEOShiftLowering{builder, loc}.lower(loweredActuals,
argLowering,
-stmtResultType);
if (name == "reshape")
return HlfirReshapeLowering{builder, loc}.lower(loweredActuals,
argLowering,
stmtResultType);
diff --git a/flang/test/Lower/HLFIR/eoshift.f90
b/flang/test/Lower/HLFIR/eoshift.f90
deleted file mode 100644
index 3b2570ab59365..0
--- a/flang/test/Lower/HLFIR/eoshift.f90
+++ /dev/null
@@ -1,259 +0,0 @@
-! Test lowering of EOSHIFT intrinsic to HLFIR
-! RUN: bbc -emit-hlfir -o - -I nowhere %s 2>&1 | FileCheck %s
-
-module eoshift_types
- type t
- end type t
-end module eoshift_types
-
-! 1d shift by scalar
-subroutine eoshift1(a, s)
- integer :: a(:), s
- a = EOSHIFT(a, 2)
-end subroutine
-! CHECK-LABEL: func.func @_QPeoshift1(
-! CHECK-SAME: %[[VAL_0:.*]]:
!fir.box> {fir.bindc_name = "a"},
-! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref
{fir.bindc_name = "s"}) {
-! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
-! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]]
-! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]]
-! CHECK: %[[VAL_5:.*]] = arith.constant 2 : i32
-! CHECK: %[[VAL_6:.*]] = hlfir.eoshift %[[VAL_3]]#0 %[[VAL_5]] :
(!fir.box>, i32) -> !hlfir.expr
-! CHECK: hlfir.assign %[[VAL_6]] to %[[VAL_3]]#0 :
!hlfir.expr, !fir.box>
-! CHECK: hlfir.destroy %[[VAL_6]] : !hlfir.expr
-! CHECK: return
-! CHECK: }
-
-! 1d shift by scalar with dim
-subroutine eoshift2(a, s)
- integer :: a(:), s
- a = EOSHIFT(a, 2, dim=1)
-end subroutine
-! CHECK-LABEL:
[llvm-branch-commits] [llvm] [RISCV] Track Linker Relaxable through Assembly Relaxation (PR #153670)
https://github.com/MaskRay approved this pull request. https://github.com/llvm/llvm-project/pull/153670 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Github] Remove call to llvm-project-tests from libclang tests (PR #153876)
https://github.com/DeinAlptraum edited https://github.com/llvm/llvm-project/pull/153876 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Github] Remove call to llvm-project-tests from libclang tests (PR #153876)
https://github.com/DeinAlptraum commented: Sounds good. A few comments: Please also remove the `.github/workflows/llvm-project-tests.yml` entry under `on.push.paths` & `on.pull_request.paths` in the workflow file. https://github.com/llvm/llvm-project/pull/153876 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Github] Remove call to llvm-project-tests from libclang tests (PR #153876)
@@ -25,17 +25,36 @@ on:
jobs:
check-clang-python:
# Build libclang and then run the libclang Python binding's unit tests.
+# There is an issue running on "windows-2019".
+# See
https://github.com/llvm/llvm-project/issues/76601#issuecomment-1873049082.
name: Build and run Python unit tests
if: github.repository == 'llvm/llvm-project'
+runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.13"]
-uses: ./.github/workflows/llvm-project-tests.yml
-with:
- build_target: check-clang-python
- projects: clang
- # There is an issue running on "windows-2019".
- # See
https://github.com/llvm/llvm-project/issues/76601#issuecomment-1873049082.
- os_list: '["ubuntu-24.04"]'
- python_version: ${{ matrix.python-version }}
+steps:
+ - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 #
v5.0.0
+ - name: Setup Python
+uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 #
v5.4.0
+with:
+ python-version: ${{ matrix.python_version }}
DeinAlptraum wrote:
Something doesn't seem to be working here, the action raises several warnings
and the version used for the tests at the end was the pre-installed 3.12 (see
e.g. the "Found Python3" line at the start of the "Build and Test" step, or
the Python call towards the end of the step).
A working run for comparison:
https://github.com/llvm/llvm-project/actions/runs/16983718164/job/48198262227?pr=153746
https://github.com/llvm/llvm-project/pull/153876
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/21.x: [clang][LoongArch] Ensure `target("lasx")` implies LSX support (#153542) (PR #153739)
https://github.com/SixWeining approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/153739 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Reduce number of reserved relocations when relax enabled (PR #153769)
llvmbot wrote:
@llvm/pr-subscribers-mc
@llvm/pr-subscribers-backend-loongarch
Author: ZhaoQi (zhaoqi5)
Changes
---
Full diff: https://github.com/llvm/llvm-project/pull/153769.diff
2 Files Affected:
- (modified) llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
(+9-18)
- (modified) llvm/test/MC/LoongArch/Relocations/relax-attr.s (+4-3)
``diff
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index ca5d27d54bb81..c6eda4fd63a6b 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -143,8 +143,6 @@ static void fixupLeb128(MCContext &Ctx, const MCFixup
&Fixup, uint8_t *Data,
void LoongArchAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
const MCValue &Target, uint8_t *Data,
uint64_t Value, bool IsResolved) {
- if (IsResolved && shouldForceRelocation(Fixup, Target))
-IsResolved = false;
IsResolved = addReloc(F, Fixup, Target, Value, IsResolved);
if (!Value)
return; // Doesn't change encoding.
@@ -176,20 +174,6 @@ void LoongArchAsmBackend::applyFixup(const MCFragment &F,
const MCFixup &Fixup,
}
}
-bool LoongArchAsmBackend::shouldForceRelocation(const MCFixup &Fixup,
-const MCValue &Target) {
- switch (Fixup.getKind()) {
- default:
-return STI.hasFeature(LoongArch::FeatureRelax);
- case FK_Data_1:
- case FK_Data_2:
- case FK_Data_4:
- case FK_Data_8:
- case FK_Data_leb128:
-return !Target.isAbsolute();
- }
-}
-
static inline std::pair
getRelocPairForSize(unsigned Size) {
switch (Size) {
@@ -484,9 +468,16 @@ bool LoongArchAsmBackend::addReloc(const MCFragment &F,
const MCFixup &Fixup,
return false;
}
- IsResolved = Fallback();
// If linker relaxation is enabled and supported by the current relocation,
- // append a RELAX relocation.
+ // generate a relocation and then append a RELAX.
+ if (Fixup.isLinkerRelaxable())
+IsResolved = false;
+ if (IsResolved && Fixup.isPCRel())
+IsResolved = isPCRelFixupResolved(Target.getAddSym(), F);
+
+ if (!IsResolved)
+Asm->getWriter().recordRelocation(F, Fixup, Target, FixedValue);
+
if (Fixup.isLinkerRelaxable()) {
auto FA = MCFixup::create(Fixup.getOffset(), nullptr, ELF::R_LARCH_RELAX);
Asm->getWriter().recordRelocation(F, FA, MCValue::get(nullptr),
diff --git a/llvm/test/MC/LoongArch/Relocations/relax-attr.s
b/llvm/test/MC/LoongArch/Relocations/relax-attr.s
index e3e9038b755d3..d94d32ebd7ab0 100644
--- a/llvm/test/MC/LoongArch/Relocations/relax-attr.s
+++ b/llvm/test/MC/LoongArch/Relocations/relax-attr.s
@@ -17,11 +17,8 @@
# CHECKR-NEXT: 0x4 R_LARCH_CALL36 foo 0x0
# CHECKR-NEXT: 0x4 R_LARCH_RELAX - 0x0
# CHECKR-NEXT: 0x10 R_LARCH_B21 .L0 0x0
-# CHECKR-NEXT: 0x14 R_LARCH_B21 .L1 0x0
# CHECKR-NEXT: 0x18 R_LARCH_B16 .L0 0x0
-# CHECKR-NEXT: 0x1C R_LARCH_B16 .L1 0x0
# CHECKR-NEXT: 0x20 R_LARCH_B26 .L0 0x0
-# CHECKR-NEXT: 0x24 R_LARCH_B26 .L1 0x0
# CHECKR-NEXT: }
# CHECKR-NEXT: Section ({{.*}}) .rela.data {
# CHECKR-NEXT: 0x0 R_LARCH_64 .L1 0x0
@@ -36,6 +33,10 @@
.L1:
nop
+## Relocations for branches to .L0 must be reserved and be fixed up by linker
+## when linker relaxation enabled, because of the relaxable call36 instruction.
+## Branches to .L1 can be resolved correctly at compile time, so their
+## relocations can simply be removed.
bnez $a0, .L0
beqz $a0, .L1
beq $a0, $a1, .L0
``
https://github.com/llvm/llvm-project/pull/153769
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Reduce number of reserved relocations when relax enabled (PR #153769)
https://github.com/zhaoqi5 created
https://github.com/llvm/llvm-project/pull/153769
None
>From f67324528d93ca3e908f39e8e89caef5ecc3e11f Mon Sep 17 00:00:00 2001
From: Qi Zhao
Date: Fri, 15 Aug 2025 17:12:33 +0800
Subject: [PATCH] [LoongArch] Reduce number of reserved relocations when relax
enabled
---
.../MCTargetDesc/LoongArchAsmBackend.cpp | 27 +++
.../MC/LoongArch/Relocations/relax-attr.s | 7 ++---
2 files changed, 13 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index ca5d27d54bb81..c6eda4fd63a6b 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -143,8 +143,6 @@ static void fixupLeb128(MCContext &Ctx, const MCFixup
&Fixup, uint8_t *Data,
void LoongArchAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
const MCValue &Target, uint8_t *Data,
uint64_t Value, bool IsResolved) {
- if (IsResolved && shouldForceRelocation(Fixup, Target))
-IsResolved = false;
IsResolved = addReloc(F, Fixup, Target, Value, IsResolved);
if (!Value)
return; // Doesn't change encoding.
@@ -176,20 +174,6 @@ void LoongArchAsmBackend::applyFixup(const MCFragment &F,
const MCFixup &Fixup,
}
}
-bool LoongArchAsmBackend::shouldForceRelocation(const MCFixup &Fixup,
-const MCValue &Target) {
- switch (Fixup.getKind()) {
- default:
-return STI.hasFeature(LoongArch::FeatureRelax);
- case FK_Data_1:
- case FK_Data_2:
- case FK_Data_4:
- case FK_Data_8:
- case FK_Data_leb128:
-return !Target.isAbsolute();
- }
-}
-
static inline std::pair
getRelocPairForSize(unsigned Size) {
switch (Size) {
@@ -484,9 +468,16 @@ bool LoongArchAsmBackend::addReloc(const MCFragment &F,
const MCFixup &Fixup,
return false;
}
- IsResolved = Fallback();
// If linker relaxation is enabled and supported by the current relocation,
- // append a RELAX relocation.
+ // generate a relocation and then append a RELAX.
+ if (Fixup.isLinkerRelaxable())
+IsResolved = false;
+ if (IsResolved && Fixup.isPCRel())
+IsResolved = isPCRelFixupResolved(Target.getAddSym(), F);
+
+ if (!IsResolved)
+Asm->getWriter().recordRelocation(F, Fixup, Target, FixedValue);
+
if (Fixup.isLinkerRelaxable()) {
auto FA = MCFixup::create(Fixup.getOffset(), nullptr, ELF::R_LARCH_RELAX);
Asm->getWriter().recordRelocation(F, FA, MCValue::get(nullptr),
diff --git a/llvm/test/MC/LoongArch/Relocations/relax-attr.s
b/llvm/test/MC/LoongArch/Relocations/relax-attr.s
index e3e9038b755d3..d94d32ebd7ab0 100644
--- a/llvm/test/MC/LoongArch/Relocations/relax-attr.s
+++ b/llvm/test/MC/LoongArch/Relocations/relax-attr.s
@@ -17,11 +17,8 @@
# CHECKR-NEXT: 0x4 R_LARCH_CALL36 foo 0x0
# CHECKR-NEXT: 0x4 R_LARCH_RELAX - 0x0
# CHECKR-NEXT: 0x10 R_LARCH_B21 .L0 0x0
-# CHECKR-NEXT: 0x14 R_LARCH_B21 .L1 0x0
# CHECKR-NEXT: 0x18 R_LARCH_B16 .L0 0x0
-# CHECKR-NEXT: 0x1C R_LARCH_B16 .L1 0x0
# CHECKR-NEXT: 0x20 R_LARCH_B26 .L0 0x0
-# CHECKR-NEXT: 0x24 R_LARCH_B26 .L1 0x0
# CHECKR-NEXT: }
# CHECKR-NEXT: Section ({{.*}}) .rela.data {
# CHECKR-NEXT: 0x0 R_LARCH_64 .L1 0x0
@@ -36,6 +33,10 @@
.L1:
nop
+## Relocations for branches to .L0 must be reserved and be fixed up by linker
+## when linker relaxation enabled, because of the relaxable call36 instruction.
+## Branches to .L1 can be resolved correctly at compile time, so their
+## relocations can simply be removed.
bnez $a0, .L0
beqz $a0, .L1
beq $a0, $a1, .L0
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64][ISel] Select constructive EXT_ZZI pseudo instruction (PR #152554)
https://github.com/paulwalker-arm approved this pull request. https://github.com/llvm/llvm-project/pull/152554 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Add `split-section` to `llvm-objcopy` and implement it for `DXContainer` (PR #153265)
jh7370 wrote: > @inbelic can confirm, but I thought the intent was for this to work like > `--split-dwo` where it also removes the section from the original object. > > We have a few common tooling cases for DX files where the compiler generates > an object with a bunch of sections that you may strip out later (debug info, > runtime reflection, runtime signatures). What we'd like to have is a tooling > path where we can do that with a single invocation of objcopy something like: > > ``` > llvm-objcopy --split-section=RTS0= > --split-section=STAT= --split-section=ILDB= new debug info> > ``` > > I suggested making this a general option since it could be implemented for > any object file type, but if you think it is better to do it as a DX-specific > option we could do that as well. Rather than a new option, I'd rather you just add support for `--only-section` and `--dump-section`, which can then be specified in conjunction with each other to achieve the same result. IIRC, a while back, `--split-dwo` was considered a mistake, because the same effect could be achieved by specifying `--extract-dwo` and `--strip-dwo` and `--split-dwo` doesn't exist in GNU objcopy (which we aim for broad compatibility with). https://github.com/llvm/llvm-project/pull/153265 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Add `split-section` to `llvm-objcopy` and implement it for `DXContainer` (PR #153265)
https://github.com/jh7370 requested changes to this pull request. Requesting changes to prevent this going in without my review etc. https://github.com/llvm/llvm-project/pull/153265 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Add support for `remove-section` of `DXContainer` for `llvm-objcopy` (PR #153246)
@@ -12,18 +12,18 @@ Header: Version: Major: 1 Minor: 0 -## FileSize = 1996 - 8 (FKE0 content) - 1688 (FKE4 content) +## FileSize = 1996 - 8 (FKE1 content) - 1688 (FKE4 content) ## - 8 (2 part offsets) - 16 (2 part headers) ## = 276 -## CHECK: FileSize: 276 +# CHECK: FileSize: 276 FileSize:1996 -## CHECK: PartCount: 5 +# CHECK: PartCount: 5 jh7370 wrote: Can any of these be `CHECK-NEXT:`? https://github.com/llvm/llvm-project/pull/153246 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Add support for `remove-section` of `DXContainer` for `llvm-objcopy` (PR #153246)
@@ -25,13 +25,13 @@ Header: Minor: 0 ## FileSize = 1984 - 24 (RTS0 content) - 4 (1 part offset) - 8 (1 part header) ## = 1948 -## CHECK: FileSize: 1948 +# CHECK: FileSize: 1948 FileSize:1984 -## CHECK: PartCount: 6 +# CHECK: PartCount: 6 PartCount: 7 PartOffsets: [ 60, 1792, 1808, 1836, 1852, 1868, 1900 ] Parts: -## CHECK-NOT: RTS0 +# CHECK-NOT: RTS0 jh7370 wrote: Same comment as above re checking for "Parts:" https://github.com/llvm/llvm-project/pull/153246 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Add support for `remove-section` of `DXContainer` for `llvm-objcopy` (PR #153246)
@@ -12,18 +12,18 @@ Header: Version: Major: 1 Minor: 0 -## FileSize = 1996 - 8 (FKE0 content) - 1688 (FKE4 content) +## FileSize = 1996 - 8 (FKE1 content) - 1688 (FKE4 content) ## - 8 (2 part offsets) - 16 (2 part headers) ## = 276 -## CHECK: FileSize: 276 +# CHECK: FileSize: 276 FileSize:1996 -## CHECK: PartCount: 5 +# CHECK: PartCount: 5 PartCount: 7 -## CHECK: PartOffsets:[ 52, 68, 84, 212, 240 ] +# CHECK: PartOffsets:[ 52, 68, 84, 212, 240 ] PartOffsets: [ 60, 76, 92, 108, 236, 1932, 1960 ] Parts: -## CHECK-NOT: FKE1 -## CHECK-NOT: FKE4 +# CHECK-NOT: FKE1 +# CHECK-NOT: FKE4 jh7370 wrote: Are you aware that this just means FKE1 and FKE4 can't appear from this point onwards? I'd be tempted to put a `# CHECK: Parts:` line before them, since technically the obj2yaml output order doesn't have to be fixed and this will help remove the risk of things rotting. https://github.com/llvm/llvm-project/pull/153246 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lldb] 82f2353 - Revert "[lldb] Fallback to expression eval when Dump of variable fails in dwi…"
Author: Dave Lee
Date: 2025-08-15T08:46:32-07:00
New Revision: 82f23539351c8365281f6b68a42564c4ec25548e
URL:
https://github.com/llvm/llvm-project/commit/82f23539351c8365281f6b68a42564c4ec25548e
DIFF:
https://github.com/llvm/llvm-project/commit/82f23539351c8365281f6b68a42564c4ec25548e.diff
LOG: Revert "[lldb] Fallback to expression eval when Dump of variable fails in
dwi…"
This reverts commit f23c10f9e68efae7df10745234bf879a84b2d02b.
Added:
Modified:
lldb/source/Commands/CommandObjectDWIMPrint.cpp
Removed:
diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp
b/lldb/source/Commands/CommandObjectDWIMPrint.cpp
index 5e864a4cc52c2..0d9eb45732161 100644
--- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp
+++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp
@@ -18,14 +18,11 @@
#include "lldb/Interpreter/OptionGroupValueObjectDisplay.h"
#include "lldb/Target/StackFrame.h"
#include "lldb/Utility/ConstString.h"
-#include "lldb/Utility/LLDBLog.h"
-#include "lldb/Utility/Log.h"
#include "lldb/ValueObject/ValueObject.h"
#include "lldb/lldb-defines.h"
#include "lldb/lldb-enumerations.h"
#include "lldb/lldb-forward.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Error.h"
#include
@@ -135,22 +132,27 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command,
};
// Dump `valobj` according to whether `po` was requested or not.
- auto dump_val_object = [&](ValueObject &valobj) -> Error {
+ auto dump_val_object = [&](ValueObject &valobj) {
if (is_po) {
StreamString temp_result_stream;
- if (Error err = valobj.Dump(temp_result_stream, dump_options))
-return err;
+ if (llvm::Error error = valobj.Dump(temp_result_stream, dump_options)) {
+result.AppendError(toString(std::move(error)));
+return;
+ }
llvm::StringRef output = temp_result_stream.GetString();
maybe_add_hint(output);
result.GetOutputStream() << output;
} else {
- if (Error err = valobj.Dump(result.GetOutputStream(), dump_options))
-return err;
+ llvm::Error error =
+valobj.Dump(result.GetOutputStream(), dump_options);
+ if (error) {
+result.AppendError(toString(std::move(error)));
+return;
+ }
}
m_interpreter.PrintWarningsIfNecessary(result.GetOutputStream(),
m_cmd_name);
result.SetStatus(eReturnStatusSuccessFinishResult);
-return Error::success();
};
// First, try `expr` as a _limited_ frame variable expression path: only the
@@ -184,13 +186,8 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command,
expr);
}
- Error err = dump_val_object(*valobj_sp);
- if (!err)
-return;
-
- // Dump failed, continue on to expression evaluation.
- LLDB_LOG_ERROR(GetLog(LLDBLog::Expressions), std::move(err),
- "could not print frame variable '{1}': {0}", expr);
+ dump_val_object(*valobj_sp);
+ return;
}
}
@@ -199,14 +196,8 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command,
if (auto *state = target.GetPersistentExpressionStateForLanguage(language))
if (auto var_sp = state->GetVariable(expr))
if (auto valobj_sp = var_sp->GetValueObject()) {
- Error err = dump_val_object(*valobj_sp);
- if (!err)
-return;
-
- // Dump failed, continue on to expression evaluation.
- LLDB_LOG_ERROR(GetLog(LLDBLog::Expressions), std::move(err),
- "could not print persistent variable '{1}': {0}",
- expr);
+ dump_val_object(*valobj_sp);
+ return;
}
// Third, and lastly, try `expr` as a source expression to evaluate.
@@ -257,12 +248,10 @@ void CommandObjectDWIMPrint::DoExecute(StringRef command,
result.AppendNoteWithFormatv("ran `expression {0}{1}`", flags, expr);
}
-if (valobj_sp->GetError().GetError() != UserExpression::kNoResult) {
- if (Error err = dump_val_object(*valobj_sp))
-result.SetError(std::move(err));
-} else {
+if (valobj_sp->GetError().GetError() != UserExpression::kNoResult)
+ dump_val_object(*valobj_sp);
+else
result.SetStatus(eReturnStatusSuccessFinishNoResult);
-}
if (suppress_result)
if (auto result_var_sp =
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][PAC][ObjC] Merge the block metadata support for the arm64e abi to llvm 21 (PR #153725)
ojhunt wrote: > Not opposed but this is a pretty significant amount of change for being this > late in the rc cycles, and the changes haven't been upstream for very long. > How risky are these changes? This is the upstreaming of code we've had deployed for a few years at this point, the only changes are style improvements and the brief period where we thought we could drop `LangOpts.PointerAuthBlockDescriptorPointers` from the upstreaming, before remembering that that's only ABI on a subset of our user-space environments. So I do recognize it is a much bigger change than would be ideal at this point, but it is self contained, and only actually applies to one family of platforms, which have significant amounts of deployment experience. e.g even if there is an error in it, it only impacts us, and not having it is even worse as instead of incorrect edge cases (because if there were non-edge case problems we have a number of tests that would presumably detect it) it would be a complete ABI mismatch. https://github.com/llvm/llvm-project/pull/153725 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Parse GROUPPRIVATE directive (PR #153807)
https://github.com/kparzysz updated
https://github.com/llvm/llvm-project/pull/153807
>From ccc414db30f65308d47d2efbb3198a896bd5a67e Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek
Date: Fri, 15 Aug 2025 08:12:45 -0500
Subject: [PATCH 1/3] [flang][OpenMP] Parse GROUPPRIVATE directive
No semantic checks or lowering yet.
---
flang/include/flang/Parser/dump-parse-tree.h | 1 +
flang/include/flang/Parser/parse-tree.h | 14 +++--
flang/lib/Lower/OpenMP/OpenMP.cpp | 7 +
flang/lib/Parser/openmp-parsers.cpp | 8 +
flang/lib/Parser/unparse.cpp | 7 +
flang/lib/Semantics/check-omp-structure.cpp | 13
flang/lib/Semantics/check-omp-structure.h | 2 ++
flang/test/Lower/OpenMP/Todo/groupprivate.f90 | 9 ++
flang/test/Parser/OpenMP/groupprivate.f90 | 30 +++
9 files changed, 89 insertions(+), 2 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/Todo/groupprivate.f90
create mode 100644 flang/test/Parser/OpenMP/groupprivate.f90
diff --git a/flang/include/flang/Parser/dump-parse-tree.h
b/flang/include/flang/Parser/dump-parse-tree.h
index 2c666a6d09a7b..8fbc6ccc639bf 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -729,6 +729,7 @@ class ParseTreeDumper {
NODE(parser, OpenMPLoopConstruct)
NODE(parser, OpenMPExecutableAllocate)
NODE(parser, OpenMPAllocatorsConstruct)
+ NODE(parser, OpenMPGroupprivate)
NODE(parser, OpenMPRequiresConstruct)
NODE(parser, OpenMPSimpleStandaloneConstruct)
NODE(parser, OpenMPStandaloneConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h
b/flang/include/flang/Parser/parse-tree.h
index e72190f019dd1..ae0259fe9025e 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4943,6 +4943,15 @@ struct OpenMPDeclareSimdConstruct {
std::tuple, OmpClauseList> t;
};
+// ref: [6.0:301-303]
+//
+// groupprivate-directive ->
+//GROUPPRIVATE (variable-list-item...) // since 6.0
+struct OpenMPGroupprivate {
+ WRAPPER_CLASS_BOILERPLATE(OpenMPGroupprivate, OmpDirectiveSpecification);
+ CharBlock source;
+};
+
// 2.4 requires -> REQUIRES requires-clause[ [ [,] requires-clause]...]
struct OpenMPRequiresConstruct {
TUPLE_CLASS_BOILERPLATE(OpenMPRequiresConstruct);
@@ -4970,8 +4979,9 @@ struct OpenMPDeclarativeConstruct {
std::variant
+ OmpDeclareVariantDirective, OpenMPGroupprivate, OpenMPThreadprivate,
+ OpenMPRequiresConstruct, OpenMPUtilityConstruct,
+ OmpMetadirectiveDirective>
u;
};
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fef64ccc15015..ec2ec37e623f8 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -3593,6 +3593,13 @@ genOMP(lower::AbstractConverter &converter,
lower::SymMap &symTable,
}
}
+static void genOMP(lower::AbstractConverter &converter, lower::SymMap
&symTable,
+ semantics::SemanticsContext &semaCtx,
+ lower::pft::Evaluation &eval,
+ const parser::OpenMPGroupprivate &directive) {
+ TODO(converter.getCurrentLocation(), "GROUPPRIVATE");
+}
+
static void genOMP(lower::AbstractConverter &converter, lower::SymMap
&symTable,
semantics::SemanticsContext &semaCtx,
lower::pft::Evaluation &eval,
diff --git a/flang/lib/Parser/openmp-parsers.cpp
b/flang/lib/Parser/openmp-parsers.cpp
index 46b14861096f1..41c16212f5771 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -1773,6 +1773,12 @@
TYPE_PARSER(sourced(construct(
verbatim("DECLARE SIMD"_tok) || verbatim("DECLARE_SIMD"_tok),
maybe(parenthesized(name)), Parser{})))
+TYPE_PARSER(sourced( //
+construct(
+predicated(OmpDirectiveNameParser{},
+IsDirective(llvm::omp::Directive::OMPD_groupprivate)) >=
+Parser{})))
+
// 2.4 Requires construct
TYPE_PARSER(sourced(construct(
verbatim("REQUIRES"_tok), Parser{})))
@@ -1808,6 +1814,8 @@ TYPE_PARSER(
Parser{}) ||
construct(
Parser{}) ||
+construct(
+Parser{}) ||
construct(
Parser{}) ||
construct(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 4f8d498972807..4294a6d491648 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2716,6 +2716,13 @@ class UnparseVisitor {
void Unparse(const OpenMPDispatchConstruct &x) { //
Unparse(static_cast(x));
}
+ void Unparse(const OpenMPGroupprivate &x) {
+BeginOpenMP();
+Word("!$OMP ");
+Walk(x.v);
+Put("\n");
+EndOpenMP();
+ }
void Unparse(const OpenMPRequiresC
[llvm-branch-commits] [llvm] [mlir] [OpenMP][OMPIRBuilder] Use device shared memory for arg structures (PR #150925)
https://github.com/skatrak updated
https://github.com/llvm/llvm-project/pull/150925
>From 688b61435b38e8632ab81e9aa94fadb5aa5ad7f1 Mon Sep 17 00:00:00 2001
From: Sergio Afonso
Date: Thu, 3 Jul 2025 16:47:51 +0100
Subject: [PATCH 1/4] [OpenMP][OMPIRBuilder] Use device shared memory for arg
structures
Argument structures are created when sections of the LLVM IR corresponding to
an OpenMP construct are outlined into their own function. For this, stack
allocations are used.
This patch modifies this behavior when compiling for a target device and
outlining `parallel`-related IR, so that it uses device shared memory instead
of private stack space. This is needed in order for threads to have access to
these arguments.
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 6 ++
.../llvm/Transforms/Utils/CodeExtractor.h | 34 ++-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 98 +--
llvm/lib/Transforms/Utils/CodeExtractor.cpp | 73 ++
.../LLVMIR/omptarget-parallel-llvm.mlir | 10 +-
5 files changed, 187 insertions(+), 34 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 0fb664aa5f888..90740e0f4fad0 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2244,7 +2244,13 @@ class OpenMPIRBuilder {
/// during finalization.
struct OutlineInfo {
using PostOutlineCBTy = std::function;
+using CustomArgAllocatorCBTy = std::function;
+using CustomArgDeallocatorCBTy = std::function;
PostOutlineCBTy PostOutlineCB;
+CustomArgAllocatorCBTy CustomArgAllocatorCB;
+CustomArgDeallocatorCBTy CustomArgDeallocatorCB;
BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
SmallVector ExcludeArgsFromAggregate;
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 407eb50d2c7a3..d72f697cda992 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/Support/Compiler.h"
#include
@@ -24,7 +25,6 @@ namespace llvm {
template class SmallPtrSetImpl;
class AllocaInst;
-class BasicBlock;
class BlockFrequency;
class BlockFrequencyInfo;
class BranchProbabilityInfo;
@@ -85,6 +85,10 @@ class CodeExtractorAnalysisCache {
/// 3) Add allocas for any scalar outputs, adding all of the outputs' allocas
///as arguments, and inserting stores to the arguments for any scalars.
class CodeExtractor {
+using CustomArgAllocatorCBTy = std::function;
+using CustomArgDeallocatorCBTy = std::function;
using ValueSet = SetVector;
// Various bits of state computed on construction.
@@ -133,6 +137,25 @@ class CodeExtractorAnalysisCache {
// space.
bool ArgsInZeroAddressSpace;
+// If set, this callback will be used to allocate the arguments in the
+// caller before passing it to the outlined function holding the extracted
+// piece of code.
+CustomArgAllocatorCBTy *CustomArgAllocatorCB;
+
+// A block outside of the extraction set where previously introduced
+// intermediate allocations can be deallocated. This is only used when an
+// custom deallocator is specified.
+BasicBlock *DeallocationBlock;
+
+// If set, this callback will be used to deallocate the arguments in the
+// caller after running the outlined function holding the extracted piece
of
+// code. It will not be called if a custom allocator isn't also present.
+//
+// By default, this will be done at the end of the basic block containing
+// the call to the outlined function, except if a deallocation block is
+// specified. In that case, that will take precedence.
+CustomArgDeallocatorCBTy *CustomArgDeallocatorCB;
+
public:
/// Create a code extractor for a sequence of blocks.
///
@@ -149,7 +172,9 @@ class CodeExtractorAnalysisCache {
/// the function from which the code is being extracted.
/// If ArgsInZeroAddressSpace param is set to true, then the aggregate
/// param pointer of the outlined function is declared in zero address
-/// space.
+/// space. If a CustomArgAllocatorCB callback is specified, it will be used
+/// to allocate any structures or variable copies needed to pass arguments
+/// to the outlined function, rather than using regular allocas.
LLVM_ABI
CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr,
bool AggregateArgs = false, BlockFrequencyInfo *BFI =
nullptr,
@@ -157,7 +182,10 @@ class CodeExtractorAnalysisCache {
AssumptionCache *AC = nullptr, bool AllowVarArgs = false,
bool AllowAlloca = false,
BasicBlock *Allocat
[llvm-branch-commits] [llvm] [mlir] [OpenMP][OMPIRBuilder] Support parallel in Generic kernels (PR #150926)
https://github.com/skatrak updated
https://github.com/llvm/llvm-project/pull/150926
>From bd815722923fad252e5f41910343ef03f6f92883 Mon Sep 17 00:00:00 2001
From: Sergio Afonso
Date: Fri, 4 Jul 2025 16:32:03 +0100
Subject: [PATCH 1/2] [OpenMP][OMPIRBuilder] Support parallel in Generic
kernels
This patch introduces codegen logic to produce a wrapper function argument for
the `__kmpc_parallel_51` DeviceRTL function needed to handle arguments passed
using device shared memory in Generic mode.
---
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 100 --
.../LLVMIR/omptarget-parallel-llvm.mlir | 25 -
2 files changed, 116 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 83cb21b54394b..33a9c8d114cb6 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1406,6 +1406,86 @@ Error OpenMPIRBuilder::emitCancelationCheckImpl(
return Error::success();
}
+// Create wrapper function used to gather the outlined function's argument
+// structure from a shared buffer and to forward them to it when running in
+// Generic mode.
+//
+// The outlined function is expected to receive 2 integer arguments followed by
+// an optional pointer argument to an argument structure holding the rest.
+static Function *createTargetParallelWrapper(OpenMPIRBuilder *OMPIRBuilder,
+ Function &OutlinedFn) {
+ size_t NumArgs = OutlinedFn.arg_size();
+ assert((NumArgs == 2 || NumArgs == 3) &&
+ "expected a 2-3 argument parallel outlined function");
+ bool UseArgStruct = NumArgs == 3;
+
+ IRBuilder<> &Builder = OMPIRBuilder->Builder;
+ IRBuilder<>::InsertPointGuard IPG(Builder);
+ auto *FnTy = FunctionType::get(Builder.getVoidTy(),
+ {Builder.getInt16Ty(), Builder.getInt32Ty()},
+ /*isVarArg=*/false);
+ auto *WrapperFn =
+ Function::Create(FnTy, GlobalValue::InternalLinkage,
+ OutlinedFn.getName() + ".wrapper", OMPIRBuilder->M);
+
+ WrapperFn->addParamAttr(0, Attribute::NoUndef);
+ WrapperFn->addParamAttr(0, Attribute::ZExt);
+ WrapperFn->addParamAttr(1, Attribute::NoUndef);
+
+ BasicBlock *EntryBB =
+ BasicBlock::Create(OMPIRBuilder->M.getContext(), "entry", WrapperFn);
+ Builder.SetInsertPoint(EntryBB);
+
+ // Allocation.
+ Value *AddrAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
+ /*ArraySize=*/nullptr, "addr");
+ AddrAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ AddrAlloca, Builder.getPtrTy(/*AddrSpace=*/0),
+ AddrAlloca->getName() + ".ascast");
+
+ Value *ZeroAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
+ /*ArraySize=*/nullptr, "zero");
+ ZeroAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ ZeroAlloca, Builder.getPtrTy(/*AddrSpace=*/0),
+ ZeroAlloca->getName() + ".ascast");
+
+ Value *ArgsAlloca = nullptr;
+ if (UseArgStruct) {
+ArgsAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
+ /*ArraySize=*/nullptr, "global_args");
+ArgsAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ArgsAlloca, Builder.getPtrTy(/*AddrSpace=*/0),
+ArgsAlloca->getName() + ".ascast");
+ }
+
+ // Initialization.
+ Builder.CreateStore(WrapperFn->getArg(1), AddrAlloca);
+ Builder.CreateStore(Builder.getInt32(0), ZeroAlloca);
+ if (UseArgStruct) {
+Builder.CreateCall(
+OMPIRBuilder->getOrCreateRuntimeFunctionPtr(
+llvm::omp::RuntimeFunction::OMPRTL___kmpc_get_shared_variables),
+{ArgsAlloca});
+ }
+
+ SmallVector Args{AddrAlloca, ZeroAlloca};
+
+ // Load structArg from global_args.
+ if (UseArgStruct) {
+Value *StructArg = Builder.CreateLoad(Builder.getPtrTy(), ArgsAlloca);
+StructArg = Builder.CreateInBoundsGEP(Builder.getPtrTy(), StructArg,
+ {Builder.getInt64(0)});
+StructArg = Builder.CreateLoad(Builder.getPtrTy(), StructArg, "structArg");
+Args.push_back(StructArg);
+ }
+
+ // Call the outlined function holding the parallel body.
+ Builder.CreateCall(&OutlinedFn, Args);
+ Builder.CreateRetVoid();
+
+ return WrapperFn;
+}
+
// Callback used to create OpenMP runtime calls to support
// omp parallel clause for the device.
// We need to use this callback to replace call to the OutlinedFn in OuterFn
@@ -1415,6 +1495,10 @@ static void targetParallelCallback(
BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition,
Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr,
Value *ThreadID, const SmallVector &ToBeDeleted) {
+ assert(OutlinedFn.arg_size() >= 2 &&
+ "Expected at least tid and bounded tid as arguments");
+ unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
+
//
[llvm-branch-commits] [llvm] [OpenMPOpt] Make parallel regions reachable from new DeviceRTL loop functions (PR #150927)
https://github.com/skatrak updated
https://github.com/llvm/llvm-project/pull/150927
>From 38a38bb056951bf50c5af7f7562bcbb834259a39 Mon Sep 17 00:00:00 2001
From: Sergio Afonso
Date: Fri, 25 Jul 2025 13:52:11 +0100
Subject: [PATCH 1/2] [OpenMPOpt] Make parallel regions reachable from new
DeviceRTL loop functions
This patch updates the OpenMP optimization pass to know about the new DeviceRTL
functions for loop constructs.
This change marks these functions as potentially containing parallel regions,
which fixes a current bug with the state machine rewrite optimization. It
previously failed to identify parallel regions located inside of the callbacks
passed to these new DeviceRTL functions, causing the resulting code to skip
executing these parallel regions.
As a result, Generic kernels produced by Flang that contain parallel regions
now work properly.
One known related issue not fixed by this patch is that the presence of calls
to these functions will prevent the SPMD-ization of Generic kernels by
OpenMPOpt. Previously, this was due to assuming there was no parallel region.
This is changed by this patch, but instead we now mark it temporarily as
unsupported in an SPMD context. The reason is that, without additional changes,
code intended for the main thread of the team located outside of the parallel
region would not be guarded properly, resulting in race conditions and
generally invalid behavior.
---
llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 22 +++
.../fortran/target-generic-loops.f90 | 130 ++
.../offloading/fortran/target-spmd-loops.f90 | 39 ++
3 files changed, 191 insertions(+)
create mode 100644 offload/test/offloading/fortran/target-generic-loops.f90
create mode 100644 offload/test/offloading/fortran/target-spmd-loops.f90
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 5e2247f2a88d0..d58da7b1db0e3 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -5020,6 +5020,28 @@ struct AAKernelInfoCallSite : AAKernelInfo {
case OMPRTL___kmpc_free_shared:
// Return without setting a fixpoint, to be resolved in updateImpl.
return;
+ case OMPRTL___kmpc_distribute_static_loop_4:
+ case OMPRTL___kmpc_distribute_static_loop_4u:
+ case OMPRTL___kmpc_distribute_static_loop_8:
+ case OMPRTL___kmpc_distribute_static_loop_8u:
+ case OMPRTL___kmpc_distribute_for_static_loop_4:
+ case OMPRTL___kmpc_distribute_for_static_loop_4u:
+ case OMPRTL___kmpc_distribute_for_static_loop_8:
+ case OMPRTL___kmpc_distribute_for_static_loop_8u:
+ case OMPRTL___kmpc_for_static_loop_4:
+ case OMPRTL___kmpc_for_static_loop_4u:
+ case OMPRTL___kmpc_for_static_loop_8:
+ case OMPRTL___kmpc_for_static_loop_8u:
+// Parallel regions might be reached by these calls, as they take a
+// callback argument potentially arbitrary user-provided code.
+ReachedUnknownParallelRegions.insert(&CB);
+// TODO: The presence of these calls on their own does not prevent a
+// kernel from being SPMD-izable. We mark it as such because we need
+// further changes in order to also consider the contents of the
+// callbacks passed to them.
+SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+SPMDCompatibilityTracker.insert(&CB);
+break;
default:
// Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
// generally. However, they do not hide parallel regions.
diff --git a/offload/test/offloading/fortran/target-generic-loops.f90
b/offload/test/offloading/fortran/target-generic-loops.f90
new file mode 100644
index 0..07bcbfd2c8752
--- /dev/null
+++ b/offload/test/offloading/fortran/target-generic-loops.f90
@@ -0,0 +1,130 @@
+! Offloading test for generic target regions containing different kinds of
+! loop constructs inside.
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ integer :: i1, i2, n1, n2, counter
+
+ n1 = 100
+ n2 = 50
+
+ counter = 0
+ !$omp target map(tofrom:counter)
+!$omp teams distribute reduction(+:counter)
+do i1=1, n1
+ counter = counter + 1
+end do
+ !$omp end target
+
+ ! CHECK: 1 100
+ print '(I2" "I0)', 1, counter
+
+ counter = 0
+ !$omp target map(tofrom:counter)
+!$omp parallel do reduction(+:counter)
+do i1=1, n1
+ counter = counter + 1
+end do
+!$omp parallel do reduction(+:counter)
+do i1=1, n1
+ counter = counter + 1
+end do
+ !$omp end target
+
+ ! CHECK: 2 200
+ print '(I2" "I0)', 2, counter
+
+ counter = 0
+ !$omp target map(tofrom:counter)
+counter = counter + 1
+!$omp parallel do reduction(+:counter)
+do i1=1, n1
+ counter = counter + 1
+end do
+counter = counter + 1
+!$omp parallel do reduction(+:counter)
+do i1=1, n1
+ counte
[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add missing operand when disassembling Y4_crswap10 (#153849) (PR #153926)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/153926 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add missing operand when disassembling Y4_crswap10 (#153849) (PR #153926)
llvmbot wrote:
@llvm/pr-subscribers-mc
Author: None (llvmbot)
Changes
Backport 76d993bd25ff462d915f69772454e7b1ca42fdb8
Requested by: @androm3da
---
Full diff: https://github.com/llvm/llvm-project/pull/153926.diff
2 Files Affected:
- (modified) llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp (+3)
- (modified) llvm/test/MC/Hexagon/system-inst.s (+3)
``diff
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 22cff7c80fa01..bcddb540d35dc 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -526,6 +526,9 @@ DecodeStatus
HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
MI.insert(MI.begin() + 1,
MCOperand::createExpr(MCConstantExpr::create(-1, getContext(;
break;
+ case Hexagon::Y4_crswap10:
+MI.addOperand(MCOperand::createReg(Hexagon::SGP1_0));
+break;
default:
break;
}
diff --git a/llvm/test/MC/Hexagon/system-inst.s
b/llvm/test/MC/Hexagon/system-inst.s
index 7bc1533598532..07f7ca0acb2dc 100644
--- a/llvm/test/MC/Hexagon/system-inst.s
+++ b/llvm/test/MC/Hexagon/system-inst.s
@@ -89,6 +89,9 @@ crswap(r12,sgp0)
#CHECK: 652dc000 { crswap(r13,sgp1) }
crswap(r13,sgp1)
+#CHECK: 6d8ec000 { crswap(r15:14,s1:0) }
+crswap(r15:14,sgp1:0)
+
#CHECK: 660fc00e { r14 = getimask(r15) }
r14=getimask(r15)
``
https://github.com/llvm/llvm-project/pull/153926
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add missing operand when disassembling Y4_crswap10 (#153849) (PR #153926)
llvmbot wrote:
@llvm/pr-subscribers-backend-hexagon
Author: None (llvmbot)
Changes
Backport 76d993bd25ff462d915f69772454e7b1ca42fdb8
Requested by: @androm3da
---
Full diff: https://github.com/llvm/llvm-project/pull/153926.diff
2 Files Affected:
- (modified) llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp (+3)
- (modified) llvm/test/MC/Hexagon/system-inst.s (+3)
``diff
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 22cff7c80fa01..bcddb540d35dc 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -526,6 +526,9 @@ DecodeStatus
HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
MI.insert(MI.begin() + 1,
MCOperand::createExpr(MCConstantExpr::create(-1, getContext(;
break;
+ case Hexagon::Y4_crswap10:
+MI.addOperand(MCOperand::createReg(Hexagon::SGP1_0));
+break;
default:
break;
}
diff --git a/llvm/test/MC/Hexagon/system-inst.s
b/llvm/test/MC/Hexagon/system-inst.s
index 7bc1533598532..07f7ca0acb2dc 100644
--- a/llvm/test/MC/Hexagon/system-inst.s
+++ b/llvm/test/MC/Hexagon/system-inst.s
@@ -89,6 +89,9 @@ crswap(r12,sgp0)
#CHECK: 652dc000 { crswap(r13,sgp1) }
crswap(r13,sgp1)
+#CHECK: 6d8ec000 { crswap(r15:14,s1:0) }
+crswap(r15:14,sgp1:0)
+
#CHECK: 660fc00e { r14 = getimask(r15) }
r14=getimask(r15)
``
https://github.com/llvm/llvm-project/pull/153926
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add missing operand when disassembling Y4_crswap10 (#153849) (PR #153926)
llvmbot wrote: @quic-akaryaki What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/153926 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add missing operand when disassembling Y4_crswap10 (#153849) (PR #153926)
https://github.com/llvmbot created
https://github.com/llvm/llvm-project/pull/153926
Backport 76d993bd25ff462d915f69772454e7b1ca42fdb8
Requested by: @androm3da
>From 27a00648607f22b4b2d1de4adb72fe6364a7ef88 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov
Date: Sat, 16 Aug 2025 05:13:43 +0300
Subject: [PATCH] [Hexagon] Add missing operand when disassembling Y4_crswap10
(#153849)
Auto-generated decoder fails to add the $sgp10 operand because it has no
encoding bits.
Work around this by adding the missing operand after decoding is
complete.
Fixes #153829.
(cherry picked from commit 76d993bd25ff462d915f69772454e7b1ca42fdb8)
---
llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp | 3 +++
llvm/test/MC/Hexagon/system-inst.s | 3 +++
2 files changed, 6 insertions(+)
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 22cff7c80fa01..bcddb540d35dc 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -526,6 +526,9 @@ DecodeStatus
HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
MI.insert(MI.begin() + 1,
MCOperand::createExpr(MCConstantExpr::create(-1, getContext(;
break;
+ case Hexagon::Y4_crswap10:
+MI.addOperand(MCOperand::createReg(Hexagon::SGP1_0));
+break;
default:
break;
}
diff --git a/llvm/test/MC/Hexagon/system-inst.s
b/llvm/test/MC/Hexagon/system-inst.s
index 7bc1533598532..07f7ca0acb2dc 100644
--- a/llvm/test/MC/Hexagon/system-inst.s
+++ b/llvm/test/MC/Hexagon/system-inst.s
@@ -89,6 +89,9 @@ crswap(r12,sgp0)
#CHECK: 652dc000 { crswap(r13,sgp1) }
crswap(r13,sgp1)
+#CHECK: 6d8ec000 { crswap(r15:14,s1:0) }
+crswap(r15:14,sgp1:0)
+
#CHECK: 660fc00e { r14 = getimask(r15) }
r14=getimask(r15)
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
