[llvm-branch-commits] [lld] release/21.x: [LLD][X86] Match delayLoad thunk with MSVC (#149521) (PR #151307)
https://github.com/llvmbot created
https://github.com/llvm/llvm-project/pull/151307
Backport 75b79c9238bc083cdff2d2364be40633fdf4d1ad
Requested by: @e-kud
>From c325db48499ba6332b6d598db489ec1804d4506f Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov
Date: Mon, 28 Jul 2025 17:45:16 +0200
Subject: [PATCH] [LLD][X86] Match delayLoad thunk with MSVC (#149521)
Previously we saved registers in the shadow space of callee before
calling __delayLoadHelper2. Now we save arguments in the shadow space of
the caller and allocate shadow space for the callee.
Fixes #51941
-
Co-authored-by: Benjamin Santerre
(cherry picked from commit 75b79c9238bc083cdff2d2364be40633fdf4d1ad)
---
lld/COFF/DLL.cpp | 58 +++
lld/test/COFF/arm64ec-delayimport.test | 48 ++---
lld/test/COFF/arm64x-delayimport.test | 98 +-
lld/test/COFF/delayimports.test| 14 ++--
lld/test/COFF/delayimporttables.yaml | 8 +--
lld/test/COFF/giats.s | 4 +-
6 files changed, 111 insertions(+), 119 deletions(-)
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index c327da28ce138..3ce8853adb2a2 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -244,40 +244,36 @@ static const uint8_t thunkX64[] = {
};
static const uint8_t tailMergeX64[] = {
-0x51, // pushrcx
-0x52, // pushrdx
-0x41, 0x50, // pushr8
-0x41, 0x51, // pushr9
-0x48, 0x83, 0xEC, 0x48, // sub rsp, 48h
-0x66, 0x0F, 0x7F, 0x04, 0x24, // movdqa xmmword ptr [rsp], xmm0
-0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x10, // movdqa xmmword ptr [rsp+10h], xmm1
-0x66, 0x0F, 0x7F, 0x54, 0x24, 0x20, // movdqa xmmword ptr [rsp+20h], xmm2
-0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x30, // movdqa xmmword ptr [rsp+30h], xmm3
-0x48, 0x8B, 0xD0, // mov rdx, rax
-0x48, 0x8D, 0x0D, 0, 0, 0, 0, // lea rcx, [___DELAY_IMPORT_...]
-0xE8, 0, 0, 0, 0, // call__delayLoadHelper2
-0x66, 0x0F, 0x6F, 0x04, 0x24, // movdqa xmm0, xmmword ptr [rsp]
-0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x10, // movdqa xmm1, xmmword ptr [rsp+10h]
-0x66, 0x0F, 0x6F, 0x54, 0x24, 0x20, // movdqa xmm2, xmmword ptr [rsp+20h]
-0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x30, // movdqa xmm3, xmmword ptr [rsp+30h]
-0x48, 0x83, 0xC4, 0x48, // add rsp, 48h
-0x41, 0x59, // pop r9
-0x41, 0x58, // pop r8
-0x5A, // pop rdx
-0x59, // pop rcx
-0xFF, 0xE0, // jmp rax
+0x48, 0x89, 0x4C, 0x24, 0x08, // movqword ptr [rsp+8], rcx
+0x48, 0x89, 0x54, 0x24, 0x10, // movqword ptr [rsp+10h], rdx
+0x4C, 0x89, 0x44, 0x24, 0x18, // movqword ptr [rsp+18h], r8
+0x4C, 0x89, 0x4C, 0x24, 0x20, // movqword ptr [rsp+20h], r9
+0x48, 0x83, 0xEC, 0x68,// subrsp, 68h
+0x66, 0x0F, 0x7F, 0x44, 0x24, 0x20,// movdqa xmmword ptr [rsp+20h],
xmm0
+0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x30,// movdqa xmmword ptr [rsp+30h],
xmm1
+0x66, 0x0F, 0x7F, 0x54, 0x24, 0x40,// movdqa xmmword ptr [rsp+40h],
xmm2
+0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x50,// movdqa xmmword ptr [rsp+50h],
xmm3
+0x48, 0x8B, 0xD0, // movrdx, rax
+0x48, 0x8D, 0x0D, 0, 0, 0, 0, // learcx, [___DELAY_IMPORT_...]
+0xE8, 0, 0, 0, 0, // call __delayLoadHelper2
+0x66, 0x0F, 0x6F, 0x44, 0x24, 0x20,// movdqa xmm0, xmmword ptr
[rsp+20h]
+0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x30,// movdqa xmm1, xmmword ptr
[rsp+30h]
+0x66, 0x0F, 0x6F, 0x54, 0x24, 0x40,// movdqa xmm2, xmmword ptr
[rsp+40h]
+0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x50,// movdqa xmm3, xmmword ptr
[rsp+50h]
+0x48, 0x8B, 0x4C, 0x24, 0x70, // movrcx, qword ptr [rsp+70h]
+0x48, 0x8B, 0x54, 0x24, 0x78, // movrdx, qword ptr [rsp+78h]
+0x4C, 0x8B, 0x84, 0x24, 0x80, 0, 0, 0, // movr8, qword ptr [rsp+80h]
+0x4C, 0x8B, 0x8C, 0x24, 0x88, 0, 0, 0, // movr9, qword ptr [rsp+88h]
+0x48, 0x83, 0xC4, 0x68,// addrsp, 68h
+0xFF, 0xE0,// jmprax
};
static const uint8_t tailMergeUnwindInfoX64[] = {
0x01, // Version=1, Flags=UNW_FLAG_NHANDLER
-0x0a, // Size of prolog
-0x05, // Count of unwind codes
+0x18, // Size of prolog
+0x01, // Count of unwind codes
0x00, // No frame register
-0x0a, 0x82, // Offset 0xa: UWOP_ALLOC_SMALL(0x48)
-0x06, 0x02, // Offset 6: UWOP_ALLOC_SMALL(8)
-0x04, 0x02, // Offset 4: UWOP_ALLOC_SMALL(8)
-0x02, 0x02, // Offset 2: UWOP_ALLOC_SMALL(8)
-
[llvm-branch-commits] [lld] release/21.x: [LLD][X86] Match delayLoad thunk with MSVC (#149521) (PR #151307)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/151307 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/21.x: [LLD][X86] Match delayLoad thunk with MSVC (#149521) (PR #151307)
llvmbot wrote: @cjacek What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/151307 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/21.x: [LLD][X86] Match delayLoad thunk with MSVC (#149521) (PR #151307)
llvmbot wrote:
@llvm/pr-subscribers-lld
Author: None (llvmbot)
Changes
Backport 75b79c9238bc083cdff2d2364be40633fdf4d1ad
Requested by: @e-kud
---
Patch is 22.51 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/151307.diff
6 Files Affected:
- (modified) lld/COFF/DLL.cpp (+27-31)
- (modified) lld/test/COFF/arm64ec-delayimport.test (+24-24)
- (modified) lld/test/COFF/arm64x-delayimport.test (+49-49)
- (modified) lld/test/COFF/delayimports.test (+5-9)
- (modified) lld/test/COFF/delayimporttables.yaml (+4-4)
- (modified) lld/test/COFF/giats.s (+2-2)
``diff
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index c327da28ce138..3ce8853adb2a2 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -244,40 +244,36 @@ static const uint8_t thunkX64[] = {
};
static const uint8_t tailMergeX64[] = {
-0x51, // pushrcx
-0x52, // pushrdx
-0x41, 0x50, // pushr8
-0x41, 0x51, // pushr9
-0x48, 0x83, 0xEC, 0x48, // sub rsp, 48h
-0x66, 0x0F, 0x7F, 0x04, 0x24, // movdqa xmmword ptr [rsp], xmm0
-0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x10, // movdqa xmmword ptr [rsp+10h], xmm1
-0x66, 0x0F, 0x7F, 0x54, 0x24, 0x20, // movdqa xmmword ptr [rsp+20h], xmm2
-0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x30, // movdqa xmmword ptr [rsp+30h], xmm3
-0x48, 0x8B, 0xD0, // mov rdx, rax
-0x48, 0x8D, 0x0D, 0, 0, 0, 0, // lea rcx, [___DELAY_IMPORT_...]
-0xE8, 0, 0, 0, 0, // call__delayLoadHelper2
-0x66, 0x0F, 0x6F, 0x04, 0x24, // movdqa xmm0, xmmword ptr [rsp]
-0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x10, // movdqa xmm1, xmmword ptr [rsp+10h]
-0x66, 0x0F, 0x6F, 0x54, 0x24, 0x20, // movdqa xmm2, xmmword ptr [rsp+20h]
-0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x30, // movdqa xmm3, xmmword ptr [rsp+30h]
-0x48, 0x83, 0xC4, 0x48, // add rsp, 48h
-0x41, 0x59, // pop r9
-0x41, 0x58, // pop r8
-0x5A, // pop rdx
-0x59, // pop rcx
-0xFF, 0xE0, // jmp rax
+0x48, 0x89, 0x4C, 0x24, 0x08, // movqword ptr [rsp+8], rcx
+0x48, 0x89, 0x54, 0x24, 0x10, // movqword ptr [rsp+10h], rdx
+0x4C, 0x89, 0x44, 0x24, 0x18, // movqword ptr [rsp+18h], r8
+0x4C, 0x89, 0x4C, 0x24, 0x20, // movqword ptr [rsp+20h], r9
+0x48, 0x83, 0xEC, 0x68,// subrsp, 68h
+0x66, 0x0F, 0x7F, 0x44, 0x24, 0x20,// movdqa xmmword ptr [rsp+20h],
xmm0
+0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x30,// movdqa xmmword ptr [rsp+30h],
xmm1
+0x66, 0x0F, 0x7F, 0x54, 0x24, 0x40,// movdqa xmmword ptr [rsp+40h],
xmm2
+0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x50,// movdqa xmmword ptr [rsp+50h],
xmm3
+0x48, 0x8B, 0xD0, // movrdx, rax
+0x48, 0x8D, 0x0D, 0, 0, 0, 0, // learcx, [___DELAY_IMPORT_...]
+0xE8, 0, 0, 0, 0, // call __delayLoadHelper2
+0x66, 0x0F, 0x6F, 0x44, 0x24, 0x20,// movdqa xmm0, xmmword ptr
[rsp+20h]
+0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x30,// movdqa xmm1, xmmword ptr
[rsp+30h]
+0x66, 0x0F, 0x6F, 0x54, 0x24, 0x40,// movdqa xmm2, xmmword ptr
[rsp+40h]
+0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x50,// movdqa xmm3, xmmword ptr
[rsp+50h]
+0x48, 0x8B, 0x4C, 0x24, 0x70, // movrcx, qword ptr [rsp+70h]
+0x48, 0x8B, 0x54, 0x24, 0x78, // movrdx, qword ptr [rsp+78h]
+0x4C, 0x8B, 0x84, 0x24, 0x80, 0, 0, 0, // movr8, qword ptr [rsp+80h]
+0x4C, 0x8B, 0x8C, 0x24, 0x88, 0, 0, 0, // movr9, qword ptr [rsp+88h]
+0x48, 0x83, 0xC4, 0x68,// addrsp, 68h
+0xFF, 0xE0,// jmprax
};
static const uint8_t tailMergeUnwindInfoX64[] = {
0x01, // Version=1, Flags=UNW_FLAG_NHANDLER
-0x0a, // Size of prolog
-0x05, // Count of unwind codes
+0x18, // Size of prolog
+0x01, // Count of unwind codes
0x00, // No frame register
-0x0a, 0x82, // Offset 0xa: UWOP_ALLOC_SMALL(0x48)
-0x06, 0x02, // Offset 6: UWOP_ALLOC_SMALL(8)
-0x04, 0x02, // Offset 4: UWOP_ALLOC_SMALL(8)
-0x02, 0x02, // Offset 2: UWOP_ALLOC_SMALL(8)
-0x01, 0x02, // Offset 1: UWOP_ALLOC_SMALL(8)
+0x18, 0xC2, // Offset 0x18: UWOP_ALLOC_SMALL(0x68)
0x00, 0x00 // Padding to align on 32-bits
};
@@ -378,8 +374,8 @@ class TailMergeChunkX64 : public NonSectionCodeChunk {
void writeTo(uint8_t *buf) const override {
memcpy(buf, tailMergeX64, sizeof(tailMergeX64));
-write32le(buf + 39, desc->getRVA() - rva - 43);
-write32le(buf + 44, helper->getRVA() - rva - 48);
+write32le
[llvm-branch-commits] [llvm] release/21.x: [AArch64] Keep floating-point conversion in SIMD (#147707) (PR #151317)
guy-david wrote: This optimization is important for us to land in this release, since it affects internal workloads we care about. It's rather conservative and concerns a very specific pattern around floating-point conversions in the AArch64 backend. Safety is above all, so we should definitely wait for the the more expensive bots to complete their verification. https://github.com/llvm/llvm-project/pull/151317 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)
https://github.com/snehasish edited https://github.com/llvm/llvm-project/pull/148013 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)
https://github.com/snehasish approved this pull request. lgtm. It would be good to get an approval from llvm-profgen owners. cc: @WenleiHe @wlei-llvm https://github.com/llvm/llvm-project/pull/148013 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)
@@ -1027,6 +1027,20 @@ class FunctionSamples {
return VirtualCallsiteTypeCounts[mapIRLocToProfileLoc(Loc)];
}
+ /// At location \p Loc, add a type sample for the given \p Type with
+ /// \p Count. This function uses saturating arithmetic to clamp the result to
+ /// maximum uint64_t (the counter type) and returns counter_overflow to
caller
+ /// if the actual result is larger than maximum uint64_t.
+ sampleprof_error addTypeSamplesAt(const LineLocation &Loc, FunctionId Type,
+uint64_t Count) {
+auto &TypeCounts = getTypeSamplesAt(Loc);
+bool Overflowed = false;
+TypeCounts[Type] = SaturatingMultiplyAdd(Count, /* Weight= */ (uint64_t)1,
snehasish wrote:
I don't think we should insert into the map if it overflowed. Can you check the
overflow first and then insert?
https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)
@@ -344,6 +350,36 @@ void
ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile &Obj,
exitWithError("no executable segment found", FileName);
}
+uint64_t ProfiledBinary::CanonicalizeNonTextAddress(uint64_t Address) {
+ uint64_t FileOffset = 0;
+ auto MMapIter = NonTextMMapEvents.lower_bound(Address);
+ if (MMapIter == NonTextMMapEvents.end())
+return Address; // No non-text mmap event found, return the address as is.
+
+ const auto &MMapEvent = MMapIter->second;
+
+ // If the address is within the non-text mmap event, calculates its file
snehasish wrote:
typo: s/calculates/calculate/
https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)
@@ -611,23 +635,11 @@ class PerfScriptReader : public PerfReaderBase {
static SmallVector TempFileCleanups;
protected:
- // The parsed MMap event
- struct MMapEvent {
-int64_t PID = 0;
-uint64_t Address = 0;
-uint64_t Size = 0;
-uint64_t Offset = 0;
-StringRef BinaryPath;
- };
-
// Check whether a given line is LBR sample
static bool isLBRSample(StringRef Line);
// Check whether a given line is MMAP event
static bool isMMapEvent(StringRef Line);
- // Parse a single line of a PERF_RECORD_MMAP event looking for a
- // mapping between the binary name and its memory layout.
- static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
-MMapEvent &MMap);
+
snehasish wrote:
nit: extra newline? The existing code doesn't have new lines between decls.
https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)
@@ -946,6 +978,14 @@ SampleContextFrameVector ProfiledBinary::symbolize(const
InstructionPointer &IP,
return CallStack;
}
+StringRef ProfiledBinary::symbolizeDataAddress(uint64_t Address) {
+ DIGlobal DataDIGlobal = unwrapOrError(
+ Symbolizer->symbolizeData(SymbolizerPath.str(), {Address, 0}),
snehasish wrote:
Thanks for the detailed followup.
https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][ELF] Merge AgainstSymbol and AgainstSymbolWithTargetVA (PR #150798)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150798 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] dc6171d - Revert "[mlir][spirv] Fix UpdateVCEPass to deduce the correct set of capabili…"
Author: Igor Wodiany
Date: 2025-07-30T17:06:21+01:00
New Revision: dc6171d58e26aba18bb367a73a2e8a7cbf0e1f22
URL:
https://github.com/llvm/llvm-project/commit/dc6171d58e26aba18bb367a73a2e8a7cbf0e1f22
DIFF:
https://github.com/llvm/llvm-project/commit/dc6171d58e26aba18bb367a73a2e8a7cbf0e1f22.diff
LOG: Revert "[mlir][spirv] Fix UpdateVCEPass to deduce the correct set of
capabili…"
This reverts commit 3d4f1fee48689465b5026f75414247307db7d34d.
Added:
Modified:
mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp
mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir
Removed:
diff --git a/mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp
b/mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp
index 9b1c84ee66156..6a9b951ca61d6 100644
--- a/mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp
+++ b/mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp
@@ -95,13 +95,6 @@ static LogicalResult checkAndUpdateCapabilityRequirements(
return success();
}
-static void addAllImpliedCapabilities(SetVector &caps) {
- for (spirv::Capability cap : caps) {
-ArrayRef impliedCaps =
getDirectImpliedCapabilities(cap);
-caps.insert_range(impliedCaps);
- }
-}
-
void UpdateVCEPass::runOnOperation() {
spirv::ModuleOp module = getOperation();
@@ -175,8 +168,6 @@ void UpdateVCEPass::runOnOperation() {
return WalkResult::interrupt();
}
-addAllImpliedCapabilities(deducedCapabilities);
-
return WalkResult::advance();
});
diff --git a/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir
b/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir
index d657633665876..2b237665ffc4a 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir
@@ -7,7 +7,7 @@
// Test deducing minimal version.
// spirv.IAdd is available from v1.0.
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
spirv.module Logical GLSL450 attributes {
spirv.target_env = #spirv.target_env<
#spirv.vce, #spirv.resource_limits<>>
@@ -21,7 +21,7 @@ spirv.module Logical GLSL450 attributes {
// Test deducing minimal version.
// spirv.GroupNonUniformBallot is available since v1.3.
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
spirv.module Logical GLSL450 attributes {
spirv.target_env = #spirv.target_env<
#spirv.vce,
#spirv.resource_limits<>>
@@ -32,7 +32,7 @@ spirv.module Logical GLSL450 attributes {
}
}
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
spirv.module Logical GLSL450 attributes {
spirv.target_env = #spirv.target_env<#spirv.vce,
#spirv.resource_limits<>>
} {
@@ -48,7 +48,7 @@ spirv.module Logical GLSL450 attributes {
// Test minimal capabilities.
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
spirv.module Logical GLSL450 attributes {
spirv.target_env = #spirv.target_env<
#spirv.vce, #spirv.resource_limits<>>
@@ -61,10 +61,10 @@ spirv.module Logical GLSL450 attributes {
// Test Physical Storage Buffers are deduced correctly.
-// CHECK: spirv.module PhysicalStorageBuffer64 GLSL450 requires
#spirv.vce
+// CHECK: spirv.module PhysicalStorageBuffer64 GLSL450 requires
#spirv.vce
spirv.module PhysicalStorageBuffer64 GLSL450 attributes {
spirv.target_env = #spirv.target_env<
-#spirv.vce, #spirv.resource_limits<>>
+#spirv.vce, #spirv.resource_limits<>>
} {
spirv.func @physical_ptr(%val : !spirv.ptr {
spirv.decoration = #spirv.decoration }) "None" {
spirv.Return
@@ -74,7 +74,7 @@ spirv.module PhysicalStorageBuffer64 GLSL450 attributes {
// Test deducing implied capability.
// AtomicStorage implies Shader.
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
spirv.module Logical GLSL450 attributes {
spirv.target_env = #spirv.target_env<
#spirv.vce, #spirv.resource_limits<>>
@@ -95,7 +95,7 @@ spirv.module Logical GLSL450 attributes {
// * GroupNonUniformArithmetic
// * GroupNonUniformBallot
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
spirv.module Logical GLSL450 attributes {
spirv.target_env = #spirv.target_env<
#spirv.vce,
#spirv.resource_limits<>>
@@ -106,7 +106,7 @@ spirv.module Logical GLSL450 attributes {
}
}
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
spirv.module Logical GLSL450 attributes {
spirv.target_env = #spirv.target_env<
#spirv.vce, #spirv.resource_limits<>>
@@ -120,7 +120,7 @@ spirv.module Logical GLSL450 attributes {
// Test type required capabilities
// Using 8-bit integers in non-interface storage class requires Int8.
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
spirv.module Logical GLSL450 attributes {
spirv.target_env = #spirv.target_env<
#spirv.vce, #spirv.resource_limits<>>
@@ -132,7 +132,7 @@ spirv.module Logical GLSL450 attributes {
}
// Using 16-bit floats in non-interface storag
[llvm-branch-commits] [NFCI][ELF][Mips] Replace MipsMultiGotPage with new RE_MIPS_OSEC_LOCAL_PAGE (PR #150810)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150810 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][ELF][Mips] Replace MipsMultiGotPage with new RE_MIPS_OSEC_LOCAL_PAGE (PR #150810)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150810 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [NFC][ELF] Don't duplicate DynamicReloc constructor (PR #150811)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150811 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][ELF] Don't duplicate DynamicReloc constructor (PR #150811)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150811 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][ELF] Store DynamicReloc Kind as two bools (PR #150812)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150812 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][ELF] Replace DynamicReloc::Kind with the equivalent bool in APIs (PR #150813)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150813 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][ELF] Store DynamicReloc Kind as two bools (PR #150812)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150812 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][ELF] Replace DynamicReloc::Kind with the equivalent bool in APIs (PR #150813)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150813 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [NFCI][ELF] Introduce explicit Computed state for DynamicReloc (PR #150799)
https://github.com/jrtc27 updated
https://github.com/llvm/llvm-project/pull/150799
>From 1308e1aad30d7089f658832150854b1362c63f45 Mon Sep 17 00:00:00 2001
From: Jessica Clarke
Date: Sat, 26 Jul 2025 22:05:06 +0100
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20change?=
=?UTF-8?q?s=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
[skip ci]
---
lld/ELF/Config.h| 2 ++
lld/ELF/Driver.cpp | 1 +
lld/ELF/Relocations.cpp | 3 +--
lld/ELF/Target.cpp | 3 +--
4 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index d9639b06ca4bf..958e5caaf0dfa 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -701,6 +701,8 @@ struct Ctx : CommonLinkerContext {
std::unique_ptr tar;
// InputFile for linker created symbols with no source location.
InputFile *internalFile = nullptr;
+ // Dummy Undefined for relocations without a symbol.
+ Undefined *dummySym = nullptr;
// True if symbols can be exported (isExported) or preemptible.
bool hasDynsym = false;
// True if SHT_LLVM_SYMPART is used.
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 21d228eda6470..4dcf577ebcb16 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -3138,6 +3138,7 @@ template void
LinkerDriver::link(opt::InputArgList &args) {
ctx.symtab->insert(arg->getValue())->traced = true;
ctx.internalFile = createInternalFile(ctx, "");
+ ctx.dummySym = make(ctx.internalFile, "", STB_LOCAL, 0, 0);
// Handle -u/--undefined before input files. If both a.a and b.so define foo,
// -u foo a.a b.so will extract a.a.
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index bd22fe2f1aa25..e847e85b060fe 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1948,13 +1948,12 @@ void elf::postScanRelocations(Ctx &ctx) {
GotSection *got = ctx.in.got.get();
if (ctx.needsTlsLd.load(std::memory_order_relaxed) && got->addTlsIndex()) {
-static Undefined dummy(ctx.internalFile, "", STB_LOCAL, 0, 0);
if (ctx.arg.shared)
ctx.mainPart->relaDyn->addReloc(
{ctx.target->tlsModuleIndexRel, got, got->getTlsIndexOff()});
else
got->addConstant({R_ADDEND, ctx.target->symbolicRel,
-got->getTlsIndexOff(), 1, &dummy});
+got->getTlsIndexOff(), 1, ctx.dummySym});
}
assert(ctx.symAux.size() == 1);
diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp
index ad7d57d30668d..4946484074d05 100644
--- a/lld/ELF/Target.cpp
+++ b/lld/ELF/Target.cpp
@@ -105,10 +105,9 @@ ErrorPlace elf::getErrorPlace(Ctx &ctx, const uint8_t
*loc) {
if (isecLoc <= loc && loc < isecLoc + isec->getSize()) {
std::string objLoc = isec->getLocation(loc - isecLoc);
// Return object file location and source file location.
- Undefined dummy(ctx.internalFile, "", STB_LOCAL, 0, 0);
ELFSyncStream msg(ctx, DiagLevel::None);
if (isec->file)
-msg << isec->getSrcMsg(dummy, loc - isecLoc);
+msg << isec->getSrcMsg(*ctx.dummySym, loc - isecLoc);
return {isec, objLoc + ": ", std::string(msg.str())};
}
}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][ELF] Merge AddendOnly and AddendOnlyWithTargetVA (PR #150797)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150797 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][ELF] Merge AddendOnly and AddendOnlyWithTargetVA (PR #150797)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150797 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] [ELF][Mips] Fix addend for preemptible static TLS (PR #150729)
https://github.com/jrtc27 updated
https://github.com/llvm/llvm-project/pull/150729
>From 32400cb0d5c16e16b6d0d259955ba060f561fefe Mon Sep 17 00:00:00 2001
From: Jessica Clarke
Date: Sat, 26 Jul 2025 02:12:18 +0100
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
lld/ELF/SyntheticSections.cpp | 16
lld/ELF/SyntheticSections.h | 9 +
lld/test/ELF/mips-mgot.s | 2 +-
lld/test/ELF/mips-tls-64.s| 2 +-
lld/test/ELF/mips-tls.s | 2 +-
5 files changed, 12 insertions(+), 19 deletions(-)
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index efec41a737b62..0bb00c6d2bcff 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1065,9 +1065,8 @@ void MipsGotSection::build() {
// for the TP-relative offset as we don't know how much other data will
// be allocated before us in the static TLS block.
if (s->isPreemptible || ctx.arg.shared)
-ctx.mainPart->relaDyn->addReloc(
-{ctx.target->tlsGotRel, this, offset,
- DynamicReloc::AgainstSymbolWithTargetVA, *s, 0, R_ABS});
+ctx.mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible(
+ctx.target->tlsGotRel, *this, offset, *s, ctx.target->symbolicRel);
}
for (std::pair &p : got.dynTlsSymbols) {
Symbol *s = p.first;
@@ -1160,6 +1159,7 @@ void MipsGotSection::writeTo(uint8_t *buf) {
// if we had to do this.
writeUint(ctx, buf + ctx.arg.wordsize,
(uint64_t)1 << (ctx.arg.wordsize * 8 - 1));
+ ctx.target->relocateAlloc(*this, buf);
for (const FileGot &g : gots) {
auto write = [&](size_t i, const Symbol *s, int64_t a) {
uint64_t va = a;
@@ -1189,9 +1189,10 @@ void MipsGotSection::writeTo(uint8_t *buf) {
write(p.second, p.first, 0);
for (const std::pair &p : g.relocs)
write(p.second, p.first, 0);
-for (const std::pair &p : g.tls)
- write(p.second, p.first,
-p.first->isPreemptible || ctx.arg.shared ? 0 : -0x7000);
+for (const std::pair &p : g.tls) {
+ if (!p.first->isPreemptible && !ctx.arg.shared)
+write(p.second, p.first, -0x7000);
+}
for (const std::pair &p : g.dynTlsSymbols) {
if (p.first == nullptr && !ctx.arg.shared)
write(p.second, nullptr, 1);
@@ -1653,8 +1654,7 @@ int64_t DynamicReloc::computeAddend(Ctx &ctx) const {
case AgainstSymbol:
assert(sym != nullptr);
return addend;
- case AddendOnlyWithTargetVA:
- case AgainstSymbolWithTargetVA: {
+ case AddendOnlyWithTargetVA: {
uint64_t ca = inputSec->getRelocTargetVA(
ctx, Relocation{expr, type, 0, addend, sym}, getOffset());
return ctx.arg.is64 ? ca : SignExtend64<32>(ca);
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 5f01513630597..7612915b5b1dc 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -429,11 +429,6 @@ class DynamicReloc {
/// The resulting dynamic relocation references symbol #sym from the
dynamic
/// symbol table and uses #addend as the value of computeAddend(ctx).
AgainstSymbol,
-/// The resulting dynamic relocation references symbol #sym from the
dynamic
-/// symbol table and uses InputSection::getRelocTargetVA() + #addend for
the
-/// final addend. It can be used for relocations that write the symbol VA
as
-// the addend (e.g. R_MIPS_TLS_TPREL64) but still reference the symbol.
-AgainstSymbolWithTargetVA,
/// This is used by the MIPS multi-GOT implementation. It relocates
/// addresses of 64kb pages that lie inside the output section.
MipsMultiGotPage,
@@ -460,9 +455,7 @@ class DynamicReloc {
uint64_t getOffset() const;
uint32_t getSymIndex(SymbolTableBaseSection *symTab) const;
- bool needsDynSymIndex() const {
-return kind == AgainstSymbol || kind == AgainstSymbolWithTargetVA;
- }
+ bool needsDynSymIndex() const { return kind == AgainstSymbol; }
/// Computes the addend of the dynamic relocation. Note that this is not the
/// same as the #addend member variable as it may also include the symbol
diff --git a/lld/test/ELF/mips-mgot.s b/lld/test/ELF/mips-mgot.s
index 6978b5d9623b4..67bd5e6619f12 100644
--- a/lld/test/ELF/mips-mgot.s
+++ b/lld/test/ELF/mips-mgot.s
@@ -23,7 +23,7 @@
# CHECK: Contents of section .got:
# CHECK-NEXT: 7 8000 [[FOO0]] [[FOO2]]
-# CHECK-NEXT: 70010 0004 0001 0002
+# CHECK-NEXT: 70010 0001 0002
# CHECK-NEXT: 70020 0003 0004 0005 0006
# CHECK-NEXT: 70030
# CHECK-NEXT: 70040
diff --git a/lld/test/ELF/mips-tls-64.s b/lld/test/ELF/mips-tls-64.s
index 3976b50274be4..8a00b93c77e2f 100644
--- a/ll
[llvm-branch-commits] [lld] [ELF][Mips] Fix addend for preemptible static TLS (PR #150729)
https://github.com/jrtc27 updated
https://github.com/llvm/llvm-project/pull/150729
>From 32400cb0d5c16e16b6d0d259955ba060f561fefe Mon Sep 17 00:00:00 2001
From: Jessica Clarke
Date: Sat, 26 Jul 2025 02:12:18 +0100
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
=?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
---
lld/ELF/SyntheticSections.cpp | 16
lld/ELF/SyntheticSections.h | 9 +
lld/test/ELF/mips-mgot.s | 2 +-
lld/test/ELF/mips-tls-64.s| 2 +-
lld/test/ELF/mips-tls.s | 2 +-
5 files changed, 12 insertions(+), 19 deletions(-)
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index efec41a737b62..0bb00c6d2bcff 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1065,9 +1065,8 @@ void MipsGotSection::build() {
// for the TP-relative offset as we don't know how much other data will
// be allocated before us in the static TLS block.
if (s->isPreemptible || ctx.arg.shared)
-ctx.mainPart->relaDyn->addReloc(
-{ctx.target->tlsGotRel, this, offset,
- DynamicReloc::AgainstSymbolWithTargetVA, *s, 0, R_ABS});
+ctx.mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible(
+ctx.target->tlsGotRel, *this, offset, *s, ctx.target->symbolicRel);
}
for (std::pair &p : got.dynTlsSymbols) {
Symbol *s = p.first;
@@ -1160,6 +1159,7 @@ void MipsGotSection::writeTo(uint8_t *buf) {
// if we had to do this.
writeUint(ctx, buf + ctx.arg.wordsize,
(uint64_t)1 << (ctx.arg.wordsize * 8 - 1));
+ ctx.target->relocateAlloc(*this, buf);
for (const FileGot &g : gots) {
auto write = [&](size_t i, const Symbol *s, int64_t a) {
uint64_t va = a;
@@ -1189,9 +1189,10 @@ void MipsGotSection::writeTo(uint8_t *buf) {
write(p.second, p.first, 0);
for (const std::pair &p : g.relocs)
write(p.second, p.first, 0);
-for (const std::pair &p : g.tls)
- write(p.second, p.first,
-p.first->isPreemptible || ctx.arg.shared ? 0 : -0x7000);
+for (const std::pair &p : g.tls) {
+ if (!p.first->isPreemptible && !ctx.arg.shared)
+write(p.second, p.first, -0x7000);
+}
for (const std::pair &p : g.dynTlsSymbols) {
if (p.first == nullptr && !ctx.arg.shared)
write(p.second, nullptr, 1);
@@ -1653,8 +1654,7 @@ int64_t DynamicReloc::computeAddend(Ctx &ctx) const {
case AgainstSymbol:
assert(sym != nullptr);
return addend;
- case AddendOnlyWithTargetVA:
- case AgainstSymbolWithTargetVA: {
+ case AddendOnlyWithTargetVA: {
uint64_t ca = inputSec->getRelocTargetVA(
ctx, Relocation{expr, type, 0, addend, sym}, getOffset());
return ctx.arg.is64 ? ca : SignExtend64<32>(ca);
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 5f01513630597..7612915b5b1dc 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -429,11 +429,6 @@ class DynamicReloc {
/// The resulting dynamic relocation references symbol #sym from the
dynamic
/// symbol table and uses #addend as the value of computeAddend(ctx).
AgainstSymbol,
-/// The resulting dynamic relocation references symbol #sym from the
dynamic
-/// symbol table and uses InputSection::getRelocTargetVA() + #addend for
the
-/// final addend. It can be used for relocations that write the symbol VA
as
-// the addend (e.g. R_MIPS_TLS_TPREL64) but still reference the symbol.
-AgainstSymbolWithTargetVA,
/// This is used by the MIPS multi-GOT implementation. It relocates
/// addresses of 64kb pages that lie inside the output section.
MipsMultiGotPage,
@@ -460,9 +455,7 @@ class DynamicReloc {
uint64_t getOffset() const;
uint32_t getSymIndex(SymbolTableBaseSection *symTab) const;
- bool needsDynSymIndex() const {
-return kind == AgainstSymbol || kind == AgainstSymbolWithTargetVA;
- }
+ bool needsDynSymIndex() const { return kind == AgainstSymbol; }
/// Computes the addend of the dynamic relocation. Note that this is not the
/// same as the #addend member variable as it may also include the symbol
diff --git a/lld/test/ELF/mips-mgot.s b/lld/test/ELF/mips-mgot.s
index 6978b5d9623b4..67bd5e6619f12 100644
--- a/lld/test/ELF/mips-mgot.s
+++ b/lld/test/ELF/mips-mgot.s
@@ -23,7 +23,7 @@
# CHECK: Contents of section .got:
# CHECK-NEXT: 7 8000 [[FOO0]] [[FOO2]]
-# CHECK-NEXT: 70010 0004 0001 0002
+# CHECK-NEXT: 70010 0001 0002
# CHECK-NEXT: 70020 0003 0004 0005 0006
# CHECK-NEXT: 70030
# CHECK-NEXT: 70040
diff --git a/lld/test/ELF/mips-tls-64.s b/lld/test/ELF/mips-tls-64.s
index 3976b50274be4..8a00b93c77e2f 100644
--- a/ll
[llvm-branch-commits] [NFCI][ELF][Mips] Refactor MipsGotSection to avoid explicit writes (PR #150730)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150730 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][ELF][Mips] Refactor MipsGotSection to avoid explicit writes (PR #150730)
https://github.com/jrtc27 updated https://github.com/llvm/llvm-project/pull/150730 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] release/21.x [ObjCARC] Delete empty autoreleasepools with no autoreleases in them and remove ObjCARCAPElimPass (PR #150771)
https://github.com/AZero13 closed https://github.com/llvm/llvm-project/pull/150771 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)
https://github.com/easyonaadit edited https://github.com/llvm/llvm-project/pull/150170 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)
https://github.com/easyonaadit updated
https://github.com/llvm/llvm-project/pull/150170
>From e9752fc4a85d5156f821c07ecf2b5962843cec99 Mon Sep 17 00:00:00 2001
From: Aaditya
Date: Sat, 19 Jul 2025 12:57:27 +0530
Subject: [PATCH] Add builtins for wave reduction intrinsics
---
a.out| Bin 0 -> 22264 bytes
clang/include/clang/Basic/BuiltinsAMDGPU.def | 25 ++
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 58 +++
clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 378 +++
4 files changed, 461 insertions(+)
create mode 100755 a.out
diff --git a/a.out b/a.out
new file mode 100755
index
..2dbcd9ad6edc6908ee25aacddc07417f96ca46f2
GIT binary patch
literal 22264
zcmeHPdvIJ=c|Te|A`)AY^8k^A`V*w8rTI;cD87TFRne<};OQ#6ET60PXl@gLt
zZ$4Ni>d_+`Jyxh{@ObL2z)x^FKcNj5d2&MidOXL6-{qx0e5a)&LK{u_hu3)N<$6u(xGzsH>%`
zBbd(w+qFHqkLR>?`)%3+-DqLDB;67M}
zdtw>dyA_~3u22`DRjI{qCF0(1CDQow(wV~hxdwlqgzYfm-aWNUeQ5z-qkaN@xRf4H
zuNWSX%Dr`EJzLt(0%LI3J%}c1rTP|l+MC$F4zY{WX~R1&|0!a%>bp2H3|_zQ5{0t=
zm*ERnCSS)EV>xbU>VM-C522U=)xaAZfo|e6g7ixKlzOAGz!1Cq=&GKk@l`$R`o24$
zY#S{|)Xuw3&b7y#WE#lmcqVNS8*DBa_d0eWnRZgid*WbY
z@ycRj$?RYz+vB8CJ(+YOIa
zNFhIVz7_k9A-5x%DY%NUwl3SwLXQc&tGxj3Ao3mMPlbS#zczpyezccDa`D3SKc*0|f3%Vlyy
zZZ4S~)s}d=?pQ8!w>{#-?5LA>SsFMbo{pt{>oZsJy-B)p`;Z$-X7aF^k9YLr9DCZ#
zS(f3w8rSu}aXn>TsGX*^Y;oMps2(|U{Yo&dxvl{Es@|5CS~D+>`sj?uV99%qZKNOV
z4m{70kauIz8*bp^txIhk=-=G4d$+wI*x`={H!a}W+Nd`gMHQ!7o7&pb1C>^yQJ*2Z
zJ=h-X^i&bw(4p36vhMm^COY1-G1wMt<&$=ODz#^P{YWysJ{UCrKxt~+Go_9UTfEs!
zo1vrnaMXvJTUg@{`*3sXYW$cFmuEc_j{9)-+ayT)NKE>MOq2GJ_?rlraMtv@*uC@T
zTlC@ne9!rCf4(pK@CE&z4_Z1W+BaQD&DZ*H>7y~B!H1(^D}_cMesKv^%JSi87)znq
zhp#H3O11j%)jmAr!|Qx_*oULxE`^8>Zz!Q?pMCfmAHLg%`=6`BKAe3j31dF|asffJ
zKKu$FzSoDp-iIIX;cxKaAM@eY`0yh>ywQgr_2F;y;Sc+8@A={-9`oT>`Q&R%uX8wJ
zY4q)uiu4a!XcRNK8!cryu7wV4+)~MOEV(BcD>x}@kCQ6I^GJ2$dDp&6KPT+Fojq|p
zkz$2t+;(CyyU^9)%VsIM)`+Jh0?}B@Zll;5G8VesBFhEOVgC
z)LFfjBId&;ve!)Z-Hb_5uK&Ld$2F>6&;L?aZpmRjAA;
zP*2ax%4Dld`WY%GBGj)(lVLN78ot5HPiWR;KO==Q^B&BPddH34@ppKFLXFzI&x{`D
zPc9@s$yfX$>-ZO1PUBwnyROHFSYU*5dcfd9#XWR?3TYN-_qN`xgIIPy_!VzDC+JOK
z=0EX6b?Ku)XEd9$v2>Mo)#rjvZZ!YN`@a&zjAkyL&)fN|6OBJIn#zngsm}*9i9|k5
zu{_q=Y99^iiBbD5OqG7w6R}4Mi9|eiP(*7z(dB;8V<^qL6J*|SOgqR*M9RnoZ<~%2&SC$Xu%ncAKQ_Qr+Wshp2td%
zy(gZ_V?FS}fSL-9JA3U!%5m+xGr7A=_OS2|%W>*yEIpoBWFq)m!E7$M2a8dqg854Q
zXfR*$_$7~dOg@`TrOZY6d@#Rnd?b_l*XtI^C3shCDp*J-6Pet&E~!0V)S9mc3;B57
zj_pHvvA&39)#zPM;WCu64m{>D{;a1Ji;(axN8MeIvwQ>eTeYobn
z>A>pyrted#Y5&Z}Ppk|C9t;EmA65sl&wGvhNARk~fu-M)50>h{QXM#99w%zbbc|$;
zUe=c79fC0vk+uC{HZ}i=dndlGbY*|ruG%WKlwA%5~MzD1>=~~VC
z{Me1_*CRSs7-@-*7uG|%Wgs~^=BDv7O-p|oZ#UshCjELxOE#DJKs@SNR(ofAsNL#{
zb%r`)p^fc{k=TZ4=cbOBv(ecQZQbNVH;uHn$Kwe+Z8j#l)~T}W+e_Ia^
zQNKjOgJczCAzRxzHncmf8#jhF#yVQNHjZ?3wRc9_V~J2G)IQSMk#LZ>A-Zv$8c3$m
zlf2Oy80fXybuL!(Yn+AOvkT&UZY%YRx$=(2+YGLW?=WO=8J0-WM+`2!(63d&zh4Dk
ztqb&vInfu|T?Nln!T+iX{>>`*vsLhS(lLQrCF3V4&krD9ssHuAz$^8?EZC{k|5Cwp
z#*M2WI+*CLsvuUCgJVv9OqnBbH>ZNROw4f|6~v2Ku9jwZ6mUG1%h*{AGx!o&a3r5s
zL2sB6jOINJ!?MGQ{&;e{Fb)}1TaE%0`^c^{0y>6lyqHH9?Wm;-<2Jj&7bM8R(S-<(
z;H^W9N-jj&H#G2uF^lv(y^j2zXmrtX=pMfwx8$REyD>Vps0OYWhX}eHC_V=X7o?Mm
zM+i=im=-1w%~qWn3sU>=#D7oL>)&
z`ttmj@us{4Sek!=j1FZ5=mbbz88=EhD)T-vFC^Un@t=&-_<0C1(LZkHvt*uXP#gsr
zhO+F{Hed_%WnRm4V9Mpo`JEMPG9SsGaepY+caT_)QT{^p_Za*l75?c3A3-PaOL`2
z2Kyr#Yp}$mrZrbBdq)g0Ud9=WOkVc&E<%W>w^_1wnNLN`N
zI7=}IT;3CIvAkMWBC0V>6g&io9i07>l%CO4%8f6_Q!$1f7E@frnl}$O+SFZx9S>^
z{pPybZgAeW20wwy8Jv*x*?Dd+6U}qg?j8rr5
zUn6oI@ojYtyNrAocn9p@5-bS+U4P9hZ9nUu`1YNVi62J}{^Z=?uD-UJwr3-UZ)$_e
z+!fz%RO*+BVEwiCYdy-Jd~QD67MXbAr4=u%LydC-FTVBZ72jhtGP&aG(2E>=S-F?N
z)Vuz3ZZ2}T^EA?muPj&U{U^nqr~fmfzv<}}KQ{XJivFs|RjBW=E+@6t&z!rrpDgM-X?1RiQyBwS6gb%ZK}nuvG_qs
z9y~R6a^5?MBi*+Tp$KCqkfw?@B_>XuR#9+~)4ffoB#0o}+G#DY9mhj6^@k3^)sacS
zPZEwy{O}0?s(7f-$4H_*TM0|RfNmPqWoGv;v
zI=SLvT>HpmxEPr{+M2giQaB)YF0_7J$1}1(*qh}B;89fOG
z#;5t5o#oecKBJ#Unl^f#w^t(tMjrw(M(;(mWHb$iM)?$bF;d(Q)@OE5r=HYhEp7k<
zO9L9y=iG*9$x@TXPfu@xrQ&J$hmN=zpsuDxK2GYd?S%p>^lSnG3C-i?XvPh_8f|st
z&|Km26H}e=_U)dv$i#|Yu7%>eXX@9WX4bD+-Zafy?#aVAJi$Zv)Eok9Jj-`tc)()4
zYo5hwke459KL!)9?OrihyoPp9ulREy3=pmLH(qEm22V7gP8L6h0-oTvZPR3*oXJ{h
z_v~?iZ0{nISFC74J$Y&NFMvGvoW>6Rv=)`~Z&8mUhi2Ts+!<}$lV=*ULY8x#-+K#=
zYoEul{@O5NxMs}F1bYvlQe
zn?0iWFOp|hZ}x!Z|C&6z-?LfGf0{fSqS;~1KS=&{;d
z&CY0kGkG=_v&S`mBY8G$vkz;&ojjgwIDgH*jXZh|IDgGwN1m%QvsukwNgmG-oWJHT
zCC}l(>>&6vTP;M<7eUR^!O(_?$IG`qS1t3BdEprN4I_i_{D#rb_m#=>n?`>+ziDLP
ziTDFA97E`0OH=&>LF}03=GR4dLJYDzeb&euvnp#MrWooO)Nlsk1g4gZ(OGCv~&vlSPXKWtY{3U%ynQz
zV>oO0a~KnnKeNIjQMiPfZ!r0z#%Z26%xe+YGBC*Z@tW}+R&KL2_j*IGH?+&pEr#B1
zXu{AvhCX2ELxw(L=;MYyZRkH4`tOFmWa!H6CVxY(H?+&pEr#B1Xu{AvhCX2ELxys(
zeQExe^V_e<6tl0UER3?<;I~|-%zfuwMRe_v5D3((8PhJw{m1)qIp&KOT2YD(UjXmP
zNIMoX0v2cuzF405AY=3G6w_-k;tX|JgiGNT#3=uakq?`c_k+^Dw1<>q7{!nBwRn6o
zNIUq4I(}K{{+IjuevMzQ=)ty1j1Fi88j4mj2Qf!5;xm|5#=1
zk_sldh?@1*r-I#^`&(QGD@^`%Dzh-_Em|4I##+_<8dhobDxPwvsu!%BU^a!7Ct1*&x0k7
zyuAgp(G0U*xN7FDlOe^kp%>9EV1dlGospzXuS9e12zE(G2eD&>9n0vQB<5p5TLVeG
zvj*R8>WkQ0c6M*;vj_TzcG=vK!{|UVGs3lM57OtQ6B$DyNXdvkbqyu;a!oLr&0@EQ
zflctCx8p{CvahWboZe=_b_WM{+2hLIIxzG;mP(ticz0ywcPYzTqQ33DY>i
[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 3 (PR #151310)
easyonaadit wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/151310?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#150395** https://app.graphite.dev/github/pr/llvm/llvm-project/150395?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#150170** https://app.graphite.dev/github/pr/llvm/llvm-project/150170?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#151310** https://app.graphite.dev/github/pr/llvm/llvm-project/151310?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/151310?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#151309** https://app.graphite.dev/github/pr/llvm/llvm-project/151309?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#150169** https://app.graphite.dev/github/pr/llvm/llvm-project/150169?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/151310 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 2 (PR #151309)
easyonaadit wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/151309?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#150395** https://app.graphite.dev/github/pr/llvm/llvm-project/150395?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#150170** https://app.graphite.dev/github/pr/llvm/llvm-project/150170?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#151310** https://app.graphite.dev/github/pr/llvm/llvm-project/151310?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#151309** https://app.graphite.dev/github/pr/llvm/llvm-project/151309?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/151309?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#150169** https://app.graphite.dev/github/pr/llvm/llvm-project/150169?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/151309 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [AArch64] Keep floating-point conversion in SIMD (#147707) (PR #151317)
paulwalker-arm wrote: > @paulwalker-arm What do you think about merging this PR to the release branch? As far as I know the PR is not fixing an existing bug or performance regression so it depends if we've passed the point of accepting new optimisations. @guy-david will need to argue the case for the PRs importance based on their need. The PR itself is specific to AArch64 so there is no danger to other targets. That said, the PR has just landed so it seems prudent to wait for wider buildbot testing to complete before pulling into a release. https://github.com/llvm/llvm-project/pull/151317 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Use GetOmpDirectiveName to find directive source loca… (PR #150955)
https://github.com/tblah approved this pull request. https://github.com/llvm/llvm-project/pull/150955 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [llvm] Write out raw profile bytes in little endian. (PR #150375)
https://github.com/teresajohnson approved this pull request. lgtm but suggest updating the title and/or description to note this is for MemProf. https://github.com/llvm/llvm-project/pull/150375 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/21.x: [LLD][X86] Match delayLoad thunk with MSVC (#149521) (PR #151307)
https://github.com/cjacek approved this pull request. https://github.com/llvm/llvm-project/pull/151307 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy] [COFF] Ignore associative sections in executables (#151143) (PR #151336)
https://github.com/cjacek approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/151336 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/21.x: [Analysis] Prevent revisiting block when searching for noreturn vars (#150582) (PR #151381)
https://github.com/llvmbot created
https://github.com/llvm/llvm-project/pull/151381
Backport 330b40e11fd20e9a29b9c24de17e4ba23afeedc6
Requested by: @pawosm-arm
>From f0b310b07873d3a21ab8f9f0ca137e876fc0f4ec Mon Sep 17 00:00:00 2001
From: Serge Pavlov
Date: Wed, 30 Jul 2025 20:40:07 +0700
Subject: [PATCH] [Analysis] Prevent revisiting block when searching for
noreturn vars (#150582)
When searching for noreturn variable initializations, do not visit CFG
blocks that are already visited, it prevents hanging the analysis.
It must fix https://github.com/llvm/llvm-project/issues/150336.
(cherry picked from commit 330b40e11fd20e9a29b9c24de17e4ba23afeedc6)
---
clang/lib/Sema/AnalysisBasedWarnings.cpp | 4
clang/test/SemaCXX/noreturn-weverything.c | 15 +++
2 files changed, 19 insertions(+)
create mode 100644 clang/test/SemaCXX/noreturn-weverything.c
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp
b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 5e75c64eb2b9a..85ac3c06ec2c2 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -503,8 +503,12 @@ static bool areAllValuesNoReturn(const VarDecl *VD, const
CFGBlock &VarBlk,
TransferFunctions TF(VD);
BackwardDataflowWorklist Worklist(*AC.getCFG(), AC);
+ llvm::DenseSet Visited;
Worklist.enqueueBlock(&VarBlk);
while (const CFGBlock *B = Worklist.dequeue()) {
+if (Visited.contains(B))
+ continue;
+Visited.insert(B);
// First check the current block.
for (CFGBlock::const_reverse_iterator ri = B->rbegin(), re = B->rend();
ri != re; ++ri) {
diff --git a/clang/test/SemaCXX/noreturn-weverything.c
b/clang/test/SemaCXX/noreturn-weverything.c
new file mode 100644
index 0..92a587d395639
--- /dev/null
+++ b/clang/test/SemaCXX/noreturn-weverything.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -fsyntax-only %s -Weverything
+
+void free(void *);
+typedef void (*set_free_func)(void *);
+struct Method {
+ int nparams;
+ int *param;
+};
+void selelem_free_method(struct Method* method, void* data) {
+set_free_func free_func = 0;
+for (int i = 0; i < method->nparams; ++i)
+free(&method->param[i]);
+if (data && free_func)
+free_func(data);
+}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/21.x: [LLD][X86] Match delayLoad thunk with MSVC (#149521) (PR #151307)
https://github.com/mstorsjo approved this pull request. https://github.com/llvm/llvm-project/pull/151307 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)
https://github.com/easyonaadit updated
https://github.com/llvm/llvm-project/pull/150170
>From bc06c18461bd36dc8c732d04bc6fab6ebaa4c0d5 Mon Sep 17 00:00:00 2001
From: Aaditya
Date: Sat, 19 Jul 2025 12:57:27 +0530
Subject: [PATCH] Add builtins for wave reduction intrinsics
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 25 ++
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 58 +++
clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 378 +++
3 files changed, 461 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 878543566f0e3..b91b32457ff86 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -351,6 +351,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
+//===--===//
+
+// Wave Reduction builtins.
+
+//===--===//
+
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc")
+
//===--===//
// R600-NI only builtins.
//===--===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 7dccf82b1a7a3..c3d9ec5fc8309 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -295,11 +295,69 @@ void
CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
}
+static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
+ switch (BuiltinID) {
+ default:
+llvm_unreachable("Unknown BuiltinID for wave reduction");
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
+return Intrinsic::amdgcn_wave_reduce_add;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
+return Intrinsic::amdgcn_wave_reduce_sub;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
+return Intrinsic::amdgcn_wave_reduce_min;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
+return Intrinsic::amdgcn_wave_reduce_umin;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
+return Intrinsic::amdgcn_wave_reduce_max;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
+return Intrinsic::amdgcn_wave_reduce_umax;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
+return Intrinsic::amdgcn_wave_reduce_and;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
+return Intrinsic::amdgcn_wave_reduce_or;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
+return Intrinsic::amdgcn_wave_reduce_xor;
+ }
+}
+
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
llvm::SyncScope::ID SSID;
switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u
[llvm-branch-commits] [clang] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)
https://github.com/easyonaadit updated
https://github.com/llvm/llvm-project/pull/150170
>From bc06c18461bd36dc8c732d04bc6fab6ebaa4c0d5 Mon Sep 17 00:00:00 2001
From: Aaditya
Date: Sat, 19 Jul 2025 12:57:27 +0530
Subject: [PATCH] Add builtins for wave reduction intrinsics
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 25 ++
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 58 +++
clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 378 +++
3 files changed, 461 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 878543566f0e3..b91b32457ff86 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -351,6 +351,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
+//===--===//
+
+// Wave Reduction builtins.
+
+//===--===//
+
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc")
+
//===--===//
// R600-NI only builtins.
//===--===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 7dccf82b1a7a3..c3d9ec5fc8309 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -295,11 +295,69 @@ void
CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
}
+static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
+ switch (BuiltinID) {
+ default:
+llvm_unreachable("Unknown BuiltinID for wave reduction");
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
+return Intrinsic::amdgcn_wave_reduce_add;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
+return Intrinsic::amdgcn_wave_reduce_sub;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
+return Intrinsic::amdgcn_wave_reduce_min;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
+return Intrinsic::amdgcn_wave_reduce_umin;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
+return Intrinsic::amdgcn_wave_reduce_max;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
+return Intrinsic::amdgcn_wave_reduce_umax;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
+return Intrinsic::amdgcn_wave_reduce_and;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
+return Intrinsic::amdgcn_wave_reduce_or;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
+return Intrinsic::amdgcn_wave_reduce_xor;
+ }
+}
+
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
llvm::SyncScope::ID SSID;
switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u
[llvm-branch-commits] [flang] [flang][OpenMP] Store directive information in OpenMPSectionConstruct (PR #150804)
kparzysz wrote: Ping https://github.com/llvm/llvm-project/pull/150804 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [MemProf] Fix FileCheck prefix in the histogram test. (PR #150506)
https://github.com/snehasish edited https://github.com/llvm/llvm-project/pull/150506 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Fix FileCheck prefix in the histogram test. (PR #150506)
https://github.com/snehasish updated
https://github.com/llvm/llvm-project/pull/150506
>From 66b78c5ea8b8e016557edf98388c03e34c51b8ce Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Thu, 24 Jul 2025 06:25:00 +
Subject: [PATCH] Fix FileCheck prefix in the histogram test.
---
.../memprof-padding-histogram.test| 152 +-
1 file changed, 76 insertions(+), 76 deletions(-)
diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
index 79521f3aceb6d..2d0346e7cb259 100644
--- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -21,79 +21,79 @@ CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
CHECK-NEXT: -
CHECK: Records:
-CHEC-NEXTFunctionGUID: {{[0-9]+}}
-CHEC-NEXTAllocSites:
-CHEC-NEXT-
-CHEC-NEXT Callstack:
-CHEC-NEXT -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 3
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 5
-CHEC-NEXTMinAccessCount: 5
-CHEC-NEXTMaxAccessCount: 5
-CHEC-NEXTTotalSize: 24
-CHEC-NEXTMinSize: 24
-CHEC-NEXTMaxSize: 24
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 20
-CHEC-NEXTMinAccessDensity: 20
-CHEC-NEXTMaxAccessDensity: 20
-CHEC-NEXTTotalLifetimeAccessDensity: 2
-CHEC-NEXTMinLifetimeAccessDensity: 2
-CHEC-NEXTMaxLifetimeAccessDensity: 2
-CHEC-NEXTAccessHistogramSize: 3
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -1 -2
-CHEC-NEXT-
-CHEC-NEXT Callstack:
-CHEC-NEXT -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 10
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 4
-CHEC-NEXTMinAccessCount: 4
-CHEC-NEXTMaxAccessCount: 4
-CHEC-NEXTTotalSize: 48
-CHEC-NEXTMinSize: 48
-CHEC-NEXTMaxSize: 48
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 8
-CHEC-NEXTMinAccessDensity: 8
-CHEC-NEXTMaxAccessDensity: 8
-CHEC-NEXTTotalLifetimeAccessDensity: 8000
-CHEC-NEXTMinLifetimeAccessDensity: 8000
-CHEC-NEXTMaxLifetimeAccessDensity: 8000
-CHEC-NEXTAccessHistogramSize: 6
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -0 -0 -0 -1 -1
+CHECK-NEXTFunctionGUID: {{[0-9]+}}
+CHECK-NEXTAllocSites:
+CHECK-NEXT-
+CHECK-NEXT Callstack:
+CHECK-NEXT -
+CHECK-NEXTFunction: {{[0-9]+}}
+CHECK-NEXTSymbolName: main
+CHECK-NEXTLineOffset: 3
+CHECK-NEXTColumn: 10
+CHECK-NEXTInline: 0
+CHECK-NEXT MemInfoBlock:
+CHECK-NEXTAllocCount: 1
+CHECK-NEXTTotalAccessCount: 5
+CHECK-NEXTMinAccessCount: 5
+CHECK-NEXTMaxAccessCount: 5
+CHECK-NEXTTotalSize: 24
+CHECK-NEXTMinSize: 24
+CHECK-NEXTMaxSize: 24
+CHECK-NEXTAllocTimestamp: {{[0-9]+}}
+CHECK-NEXTDeallocTimestamp: {{[0-9]+}}
+CHECK-NEXTTotalLifetime: 0
+CHECK-NEXTMinLifetime: 0
+CHECK-NEXTMaxLifetime: 0
+CHECK-NEXTAllocCpuId: 11
+CHECK-NEXTDeallocCpuId: 11
+CHECK-NEXTNumMigratedCpu: 0
+CHECK-NEXTNumLifetimeOverlaps: 0
+CHECK-NEXTNumSameAllocCpu: 0
+CHECK-NEXTNumSameDeallocCpu: 0
+CHECK-NEXTDataTypeId: 0
+CHECK-NEXTTotalAccessDensity: 20
+CHECK-NEXTMinAccessDensity: 20
+CHECK-NEXTMaxAccessDensity: 20
+CHECK-NEXTTotalLifetimeAccessDensity: 2
+CHECK-NEXTMinLifetimeAccessDensity: 2
+CHECK-NEXTMaxLifetimeAccessDensity: 2
+CHECK-NEXTAccessHistogramSize: 3
+CHECK-NEXTAccessHistogram: {{[0-9]+}}
+CHECK-NEXTAcce
[llvm-branch-commits] [llvm] Fix FileCheck prefix in the histogram test. (PR #150506)
https://github.com/snehasish updated
https://github.com/llvm/llvm-project/pull/150506
>From 66b78c5ea8b8e016557edf98388c03e34c51b8ce Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Thu, 24 Jul 2025 06:25:00 +
Subject: [PATCH] Fix FileCheck prefix in the histogram test.
---
.../memprof-padding-histogram.test| 152 +-
1 file changed, 76 insertions(+), 76 deletions(-)
diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
index 79521f3aceb6d..2d0346e7cb259 100644
--- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -21,79 +21,79 @@ CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
CHECK-NEXT: -
CHECK: Records:
-CHEC-NEXTFunctionGUID: {{[0-9]+}}
-CHEC-NEXTAllocSites:
-CHEC-NEXT-
-CHEC-NEXT Callstack:
-CHEC-NEXT -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 3
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 5
-CHEC-NEXTMinAccessCount: 5
-CHEC-NEXTMaxAccessCount: 5
-CHEC-NEXTTotalSize: 24
-CHEC-NEXTMinSize: 24
-CHEC-NEXTMaxSize: 24
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 20
-CHEC-NEXTMinAccessDensity: 20
-CHEC-NEXTMaxAccessDensity: 20
-CHEC-NEXTTotalLifetimeAccessDensity: 2
-CHEC-NEXTMinLifetimeAccessDensity: 2
-CHEC-NEXTMaxLifetimeAccessDensity: 2
-CHEC-NEXTAccessHistogramSize: 3
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -1 -2
-CHEC-NEXT-
-CHEC-NEXT Callstack:
-CHEC-NEXT -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 10
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 4
-CHEC-NEXTMinAccessCount: 4
-CHEC-NEXTMaxAccessCount: 4
-CHEC-NEXTTotalSize: 48
-CHEC-NEXTMinSize: 48
-CHEC-NEXTMaxSize: 48
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 8
-CHEC-NEXTMinAccessDensity: 8
-CHEC-NEXTMaxAccessDensity: 8
-CHEC-NEXTTotalLifetimeAccessDensity: 8000
-CHEC-NEXTMinLifetimeAccessDensity: 8000
-CHEC-NEXTMaxLifetimeAccessDensity: 8000
-CHEC-NEXTAccessHistogramSize: 6
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -0 -0 -0 -1 -1
+CHECK-NEXTFunctionGUID: {{[0-9]+}}
+CHECK-NEXTAllocSites:
+CHECK-NEXT-
+CHECK-NEXT Callstack:
+CHECK-NEXT -
+CHECK-NEXTFunction: {{[0-9]+}}
+CHECK-NEXTSymbolName: main
+CHECK-NEXTLineOffset: 3
+CHECK-NEXTColumn: 10
+CHECK-NEXTInline: 0
+CHECK-NEXT MemInfoBlock:
+CHECK-NEXTAllocCount: 1
+CHECK-NEXTTotalAccessCount: 5
+CHECK-NEXTMinAccessCount: 5
+CHECK-NEXTMaxAccessCount: 5
+CHECK-NEXTTotalSize: 24
+CHECK-NEXTMinSize: 24
+CHECK-NEXTMaxSize: 24
+CHECK-NEXTAllocTimestamp: {{[0-9]+}}
+CHECK-NEXTDeallocTimestamp: {{[0-9]+}}
+CHECK-NEXTTotalLifetime: 0
+CHECK-NEXTMinLifetime: 0
+CHECK-NEXTMaxLifetime: 0
+CHECK-NEXTAllocCpuId: 11
+CHECK-NEXTDeallocCpuId: 11
+CHECK-NEXTNumMigratedCpu: 0
+CHECK-NEXTNumLifetimeOverlaps: 0
+CHECK-NEXTNumSameAllocCpu: 0
+CHECK-NEXTNumSameDeallocCpu: 0
+CHECK-NEXTDataTypeId: 0
+CHECK-NEXTTotalAccessDensity: 20
+CHECK-NEXTMinAccessDensity: 20
+CHECK-NEXTMaxAccessDensity: 20
+CHECK-NEXTTotalLifetimeAccessDensity: 2
+CHECK-NEXTMinLifetimeAccessDensity: 2
+CHECK-NEXTMaxLifetimeAccessDensity: 2
+CHECK-NEXTAccessHistogramSize: 3
+CHECK-NEXTAccessHistogram: {{[0-9]+}}
+CHECK-NEXTAcce
[llvm-branch-commits] [compiler-rt] [llvm] Write out raw profile bytes in little endian. (PR #150375)
https://github.com/snehasish updated
https://github.com/llvm/llvm-project/pull/150375
>From 733f040d1e113d71a328bacce5dc5abcc61a9258 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Thu, 24 Jul 2025 06:10:55 +
Subject: [PATCH 1/3] Write out raw profile bytes in little endian.
Instead of writing out in native endian, write out the raw profile bytes
in little endian. Also update the MIB data in little endian. Also clean
up some lint and unused includes in rawprofile.cpp.
---
.../lib/memprof/memprof_rawprofile.cpp| 17 +
llvm/lib/ProfileData/MemProfReader.cpp| 35 +--
2 files changed, 44 insertions(+), 8 deletions(-)
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index f909d78f5f36a..fbcfee3d655eb 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -7,10 +7,7 @@
#include "sanitizer_common/sanitizer_allocator_internal.h"
#include "sanitizer_common/sanitizer_array_ref.h"
#include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_linux.h"
-#include "sanitizer_common/sanitizer_procmaps.h"
#include "sanitizer_common/sanitizer_stackdepot.h"
-#include "sanitizer_common/sanitizer_stackdepotbase.h"
#include "sanitizer_common/sanitizer_stacktrace.h"
#include "sanitizer_common/sanitizer_vector.h"
@@ -23,7 +20,16 @@ using ::llvm::memprof::encodeHistogramCount;
namespace {
template char *WriteBytes(const T &Pod, char *Buffer) {
- *(T *)Buffer = Pod;
+ static_assert(is_trivially_copyable::value, "T must be POD");
+ const uint8_t *Src = reinterpret_cast(&Pod);
+ for (size_t I = 0; I < sizeof(T); ++I) {
+Buffer[I] = Src[I];
+ }
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (size_t i = 0; i < sizeof(T) / 2; ++i) {
+std::swap(buffer[i], buffer[sizeof(T) - 1 - i]);
+ }
+#endif
return Buffer + sizeof(T);
}
@@ -33,7 +39,6 @@ void RecordStackId(const uptr Key, UNUSED LockedMemInfoBlock
*const &MIB,
auto *StackIds = reinterpret_cast *>(Arg);
StackIds->PushBack(Key);
}
-} // namespace
u64 SegmentSizeBytes(ArrayRef Modules) {
u64 NumSegmentsToRecord = 0;
@@ -184,6 +189,7 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const
Vector &StackIds,
CHECK(ExpectedNumBytes >= static_cast(Ptr - Buffer) &&
"Expected num bytes != actual bytes written");
}
+} // namespace
// Format
// -- Header
@@ -288,5 +294,4 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap,
ArrayRef Modules,
return TotalSizeBytes;
}
-
} // namespace __memprof
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp
b/llvm/lib/ProfileData/MemProfReader.cpp
index 9db699712d6f3..3fc0dbfd8e69d 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -146,8 +146,39 @@ readMemInfoBlocksCommon(const char *Ptr, bool
IsHistogramEncoded = false) {
const uint64_t Id =
endian::readNext(Ptr);
-MemInfoBlock MIB = *reinterpret_cast(Ptr);
-Ptr += sizeof(MemInfoBlock);
+MemInfoBlock MIB;
+#define READ_MIB_FIELD(FIELD)
\
+ MIB.FIELD = endian::readNext(Ptr)
+
+READ_MIB_FIELD(AllocCount);
+READ_MIB_FIELD(TotalAccessCount);
+READ_MIB_FIELD(MinAccessCount);
+READ_MIB_FIELD(MaxAccessCount);
+READ_MIB_FIELD(TotalSize);
+READ_MIB_FIELD(MinSize);
+READ_MIB_FIELD(MaxSize);
+READ_MIB_FIELD(AllocTimestamp);
+READ_MIB_FIELD(DeallocTimestamp);
+READ_MIB_FIELD(TotalLifetime);
+READ_MIB_FIELD(MinLifetime);
+READ_MIB_FIELD(MaxLifetime);
+READ_MIB_FIELD(AllocCpuId);
+READ_MIB_FIELD(DeallocCpuId);
+READ_MIB_FIELD(NumMigratedCpu);
+READ_MIB_FIELD(NumLifetimeOverlaps);
+READ_MIB_FIELD(NumSameAllocCpu);
+READ_MIB_FIELD(NumSameDeallocCpu);
+READ_MIB_FIELD(DataTypeId);
+READ_MIB_FIELD(TotalAccessDensity);
+READ_MIB_FIELD(MinAccessDensity);
+READ_MIB_FIELD(MaxAccessDensity);
+READ_MIB_FIELD(TotalLifetimeAccessDensity);
+READ_MIB_FIELD(MinLifetimeAccessDensity);
+READ_MIB_FIELD(MaxLifetimeAccessDensity);
+READ_MIB_FIELD(AccessHistogramSize);
+READ_MIB_FIELD(AccessHistogram);
+#undef READ_MIB_FIELD
if (MIB.AccessHistogramSize > 0) {
// The in-memory representation uses uint64_t for histogram entries.
>From 4f3fc5e1eef921d5fa0058cf91a2c139ba7af249 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Wed, 30 Jul 2025 00:35:20 +
Subject: [PATCH 2/3] Address comment
---
compiler-rt/lib/memprof/memprof_rawprofile.cpp | 12 ++--
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index fbcfee3d655eb..bf04afa679c9c 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -22,13 +22,13 @@ namespace
[llvm-branch-commits] [compiler-rt] [llvm] Write out raw profile bytes in little endian. (PR #150375)
https://github.com/snehasish updated
https://github.com/llvm/llvm-project/pull/150375
>From 733f040d1e113d71a328bacce5dc5abcc61a9258 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Thu, 24 Jul 2025 06:10:55 +
Subject: [PATCH 1/3] Write out raw profile bytes in little endian.
Instead of writing out in native endian, write out the raw profile bytes
in little endian. Also update the MIB data in little endian. Also clean
up some lint and unused includes in rawprofile.cpp.
---
.../lib/memprof/memprof_rawprofile.cpp| 17 +
llvm/lib/ProfileData/MemProfReader.cpp| 35 +--
2 files changed, 44 insertions(+), 8 deletions(-)
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index f909d78f5f36a..fbcfee3d655eb 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -7,10 +7,7 @@
#include "sanitizer_common/sanitizer_allocator_internal.h"
#include "sanitizer_common/sanitizer_array_ref.h"
#include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_linux.h"
-#include "sanitizer_common/sanitizer_procmaps.h"
#include "sanitizer_common/sanitizer_stackdepot.h"
-#include "sanitizer_common/sanitizer_stackdepotbase.h"
#include "sanitizer_common/sanitizer_stacktrace.h"
#include "sanitizer_common/sanitizer_vector.h"
@@ -23,7 +20,16 @@ using ::llvm::memprof::encodeHistogramCount;
namespace {
template char *WriteBytes(const T &Pod, char *Buffer) {
- *(T *)Buffer = Pod;
+ static_assert(is_trivially_copyable::value, "T must be POD");
+ const uint8_t *Src = reinterpret_cast(&Pod);
+ for (size_t I = 0; I < sizeof(T); ++I) {
+Buffer[I] = Src[I];
+ }
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (size_t i = 0; i < sizeof(T) / 2; ++i) {
+std::swap(buffer[i], buffer[sizeof(T) - 1 - i]);
+ }
+#endif
return Buffer + sizeof(T);
}
@@ -33,7 +39,6 @@ void RecordStackId(const uptr Key, UNUSED LockedMemInfoBlock
*const &MIB,
auto *StackIds = reinterpret_cast *>(Arg);
StackIds->PushBack(Key);
}
-} // namespace
u64 SegmentSizeBytes(ArrayRef Modules) {
u64 NumSegmentsToRecord = 0;
@@ -184,6 +189,7 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const
Vector &StackIds,
CHECK(ExpectedNumBytes >= static_cast(Ptr - Buffer) &&
"Expected num bytes != actual bytes written");
}
+} // namespace
// Format
// -- Header
@@ -288,5 +294,4 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap,
ArrayRef Modules,
return TotalSizeBytes;
}
-
} // namespace __memprof
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp
b/llvm/lib/ProfileData/MemProfReader.cpp
index 9db699712d6f3..3fc0dbfd8e69d 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -146,8 +146,39 @@ readMemInfoBlocksCommon(const char *Ptr, bool
IsHistogramEncoded = false) {
const uint64_t Id =
endian::readNext(Ptr);
-MemInfoBlock MIB = *reinterpret_cast(Ptr);
-Ptr += sizeof(MemInfoBlock);
+MemInfoBlock MIB;
+#define READ_MIB_FIELD(FIELD)
\
+ MIB.FIELD = endian::readNext(Ptr)
+
+READ_MIB_FIELD(AllocCount);
+READ_MIB_FIELD(TotalAccessCount);
+READ_MIB_FIELD(MinAccessCount);
+READ_MIB_FIELD(MaxAccessCount);
+READ_MIB_FIELD(TotalSize);
+READ_MIB_FIELD(MinSize);
+READ_MIB_FIELD(MaxSize);
+READ_MIB_FIELD(AllocTimestamp);
+READ_MIB_FIELD(DeallocTimestamp);
+READ_MIB_FIELD(TotalLifetime);
+READ_MIB_FIELD(MinLifetime);
+READ_MIB_FIELD(MaxLifetime);
+READ_MIB_FIELD(AllocCpuId);
+READ_MIB_FIELD(DeallocCpuId);
+READ_MIB_FIELD(NumMigratedCpu);
+READ_MIB_FIELD(NumLifetimeOverlaps);
+READ_MIB_FIELD(NumSameAllocCpu);
+READ_MIB_FIELD(NumSameDeallocCpu);
+READ_MIB_FIELD(DataTypeId);
+READ_MIB_FIELD(TotalAccessDensity);
+READ_MIB_FIELD(MinAccessDensity);
+READ_MIB_FIELD(MaxAccessDensity);
+READ_MIB_FIELD(TotalLifetimeAccessDensity);
+READ_MIB_FIELD(MinLifetimeAccessDensity);
+READ_MIB_FIELD(MaxLifetimeAccessDensity);
+READ_MIB_FIELD(AccessHistogramSize);
+READ_MIB_FIELD(AccessHistogram);
+#undef READ_MIB_FIELD
if (MIB.AccessHistogramSize > 0) {
// The in-memory representation uses uint64_t for histogram entries.
>From 4f3fc5e1eef921d5fa0058cf91a2c139ba7af249 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Wed, 30 Jul 2025 00:35:20 +
Subject: [PATCH 2/3] Address comment
---
compiler-rt/lib/memprof/memprof_rawprofile.cpp | 12 ++--
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index fbcfee3d655eb..bf04afa679c9c 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -22,13 +22,13 @@ namespace
[llvm-branch-commits] [compiler-rt] [llvm] [MemProf] Write out raw profile bytes in little endian. (PR #150375)
https://github.com/snehasish edited https://github.com/llvm/llvm-project/pull/150375 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [llvm] [MemProf] Write out raw profile bytes in little endian. (PR #150375)
https://github.com/snehasish updated
https://github.com/llvm/llvm-project/pull/150375
>From 090331353b22e4b97244f25166a0801ddecbef55 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Thu, 24 Jul 2025 06:10:55 +
Subject: [PATCH 1/3] Write out raw profile bytes in little endian.
Instead of writing out in native endian, write out the raw profile bytes
in little endian. Also update the MIB data in little endian. Also clean
up some lint and unused includes in rawprofile.cpp.
---
.../lib/memprof/memprof_rawprofile.cpp| 17 +
llvm/lib/ProfileData/MemProfReader.cpp| 35 +--
2 files changed, 44 insertions(+), 8 deletions(-)
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index f909d78f5f36a..fbcfee3d655eb 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -7,10 +7,7 @@
#include "sanitizer_common/sanitizer_allocator_internal.h"
#include "sanitizer_common/sanitizer_array_ref.h"
#include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_linux.h"
-#include "sanitizer_common/sanitizer_procmaps.h"
#include "sanitizer_common/sanitizer_stackdepot.h"
-#include "sanitizer_common/sanitizer_stackdepotbase.h"
#include "sanitizer_common/sanitizer_stacktrace.h"
#include "sanitizer_common/sanitizer_vector.h"
@@ -23,7 +20,16 @@ using ::llvm::memprof::encodeHistogramCount;
namespace {
template char *WriteBytes(const T &Pod, char *Buffer) {
- *(T *)Buffer = Pod;
+ static_assert(is_trivially_copyable::value, "T must be POD");
+ const uint8_t *Src = reinterpret_cast(&Pod);
+ for (size_t I = 0; I < sizeof(T); ++I) {
+Buffer[I] = Src[I];
+ }
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (size_t i = 0; i < sizeof(T) / 2; ++i) {
+std::swap(buffer[i], buffer[sizeof(T) - 1 - i]);
+ }
+#endif
return Buffer + sizeof(T);
}
@@ -33,7 +39,6 @@ void RecordStackId(const uptr Key, UNUSED LockedMemInfoBlock
*const &MIB,
auto *StackIds = reinterpret_cast *>(Arg);
StackIds->PushBack(Key);
}
-} // namespace
u64 SegmentSizeBytes(ArrayRef Modules) {
u64 NumSegmentsToRecord = 0;
@@ -184,6 +189,7 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const
Vector &StackIds,
CHECK(ExpectedNumBytes >= static_cast(Ptr - Buffer) &&
"Expected num bytes != actual bytes written");
}
+} // namespace
// Format
// -- Header
@@ -288,5 +294,4 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap,
ArrayRef Modules,
return TotalSizeBytes;
}
-
} // namespace __memprof
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp
b/llvm/lib/ProfileData/MemProfReader.cpp
index 9db699712d6f3..3fc0dbfd8e69d 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -146,8 +146,39 @@ readMemInfoBlocksCommon(const char *Ptr, bool
IsHistogramEncoded = false) {
const uint64_t Id =
endian::readNext(Ptr);
-MemInfoBlock MIB = *reinterpret_cast(Ptr);
-Ptr += sizeof(MemInfoBlock);
+MemInfoBlock MIB;
+#define READ_MIB_FIELD(FIELD)
\
+ MIB.FIELD = endian::readNext(Ptr)
+
+READ_MIB_FIELD(AllocCount);
+READ_MIB_FIELD(TotalAccessCount);
+READ_MIB_FIELD(MinAccessCount);
+READ_MIB_FIELD(MaxAccessCount);
+READ_MIB_FIELD(TotalSize);
+READ_MIB_FIELD(MinSize);
+READ_MIB_FIELD(MaxSize);
+READ_MIB_FIELD(AllocTimestamp);
+READ_MIB_FIELD(DeallocTimestamp);
+READ_MIB_FIELD(TotalLifetime);
+READ_MIB_FIELD(MinLifetime);
+READ_MIB_FIELD(MaxLifetime);
+READ_MIB_FIELD(AllocCpuId);
+READ_MIB_FIELD(DeallocCpuId);
+READ_MIB_FIELD(NumMigratedCpu);
+READ_MIB_FIELD(NumLifetimeOverlaps);
+READ_MIB_FIELD(NumSameAllocCpu);
+READ_MIB_FIELD(NumSameDeallocCpu);
+READ_MIB_FIELD(DataTypeId);
+READ_MIB_FIELD(TotalAccessDensity);
+READ_MIB_FIELD(MinAccessDensity);
+READ_MIB_FIELD(MaxAccessDensity);
+READ_MIB_FIELD(TotalLifetimeAccessDensity);
+READ_MIB_FIELD(MinLifetimeAccessDensity);
+READ_MIB_FIELD(MaxLifetimeAccessDensity);
+READ_MIB_FIELD(AccessHistogramSize);
+READ_MIB_FIELD(AccessHistogram);
+#undef READ_MIB_FIELD
if (MIB.AccessHistogramSize > 0) {
// The in-memory representation uses uint64_t for histogram entries.
>From 83336c8e84e24409acef3c5c988ad7c2e039452d Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Wed, 30 Jul 2025 00:35:20 +
Subject: [PATCH 2/3] Address comment
---
compiler-rt/lib/memprof/memprof_rawprofile.cpp | 12 ++--
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index fbcfee3d655eb..bf04afa679c9c 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -22,13 +22,13 @@ namespace
[llvm-branch-commits] [compiler-rt] [llvm] [MemProf] Write out raw profile bytes in little endian. (PR #150375)
https://github.com/snehasish updated
https://github.com/llvm/llvm-project/pull/150375
>From 090331353b22e4b97244f25166a0801ddecbef55 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Thu, 24 Jul 2025 06:10:55 +
Subject: [PATCH 1/3] Write out raw profile bytes in little endian.
Instead of writing out in native endian, write out the raw profile bytes
in little endian. Also update the MIB data in little endian. Also clean
up some lint and unused includes in rawprofile.cpp.
---
.../lib/memprof/memprof_rawprofile.cpp| 17 +
llvm/lib/ProfileData/MemProfReader.cpp| 35 +--
2 files changed, 44 insertions(+), 8 deletions(-)
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index f909d78f5f36a..fbcfee3d655eb 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -7,10 +7,7 @@
#include "sanitizer_common/sanitizer_allocator_internal.h"
#include "sanitizer_common/sanitizer_array_ref.h"
#include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_linux.h"
-#include "sanitizer_common/sanitizer_procmaps.h"
#include "sanitizer_common/sanitizer_stackdepot.h"
-#include "sanitizer_common/sanitizer_stackdepotbase.h"
#include "sanitizer_common/sanitizer_stacktrace.h"
#include "sanitizer_common/sanitizer_vector.h"
@@ -23,7 +20,16 @@ using ::llvm::memprof::encodeHistogramCount;
namespace {
template char *WriteBytes(const T &Pod, char *Buffer) {
- *(T *)Buffer = Pod;
+ static_assert(is_trivially_copyable::value, "T must be POD");
+ const uint8_t *Src = reinterpret_cast(&Pod);
+ for (size_t I = 0; I < sizeof(T); ++I) {
+Buffer[I] = Src[I];
+ }
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (size_t i = 0; i < sizeof(T) / 2; ++i) {
+std::swap(buffer[i], buffer[sizeof(T) - 1 - i]);
+ }
+#endif
return Buffer + sizeof(T);
}
@@ -33,7 +39,6 @@ void RecordStackId(const uptr Key, UNUSED LockedMemInfoBlock
*const &MIB,
auto *StackIds = reinterpret_cast *>(Arg);
StackIds->PushBack(Key);
}
-} // namespace
u64 SegmentSizeBytes(ArrayRef Modules) {
u64 NumSegmentsToRecord = 0;
@@ -184,6 +189,7 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const
Vector &StackIds,
CHECK(ExpectedNumBytes >= static_cast(Ptr - Buffer) &&
"Expected num bytes != actual bytes written");
}
+} // namespace
// Format
// -- Header
@@ -288,5 +294,4 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap,
ArrayRef Modules,
return TotalSizeBytes;
}
-
} // namespace __memprof
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp
b/llvm/lib/ProfileData/MemProfReader.cpp
index 9db699712d6f3..3fc0dbfd8e69d 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -146,8 +146,39 @@ readMemInfoBlocksCommon(const char *Ptr, bool
IsHistogramEncoded = false) {
const uint64_t Id =
endian::readNext(Ptr);
-MemInfoBlock MIB = *reinterpret_cast(Ptr);
-Ptr += sizeof(MemInfoBlock);
+MemInfoBlock MIB;
+#define READ_MIB_FIELD(FIELD)
\
+ MIB.FIELD = endian::readNext(Ptr)
+
+READ_MIB_FIELD(AllocCount);
+READ_MIB_FIELD(TotalAccessCount);
+READ_MIB_FIELD(MinAccessCount);
+READ_MIB_FIELD(MaxAccessCount);
+READ_MIB_FIELD(TotalSize);
+READ_MIB_FIELD(MinSize);
+READ_MIB_FIELD(MaxSize);
+READ_MIB_FIELD(AllocTimestamp);
+READ_MIB_FIELD(DeallocTimestamp);
+READ_MIB_FIELD(TotalLifetime);
+READ_MIB_FIELD(MinLifetime);
+READ_MIB_FIELD(MaxLifetime);
+READ_MIB_FIELD(AllocCpuId);
+READ_MIB_FIELD(DeallocCpuId);
+READ_MIB_FIELD(NumMigratedCpu);
+READ_MIB_FIELD(NumLifetimeOverlaps);
+READ_MIB_FIELD(NumSameAllocCpu);
+READ_MIB_FIELD(NumSameDeallocCpu);
+READ_MIB_FIELD(DataTypeId);
+READ_MIB_FIELD(TotalAccessDensity);
+READ_MIB_FIELD(MinAccessDensity);
+READ_MIB_FIELD(MaxAccessDensity);
+READ_MIB_FIELD(TotalLifetimeAccessDensity);
+READ_MIB_FIELD(MinLifetimeAccessDensity);
+READ_MIB_FIELD(MaxLifetimeAccessDensity);
+READ_MIB_FIELD(AccessHistogramSize);
+READ_MIB_FIELD(AccessHistogram);
+#undef READ_MIB_FIELD
if (MIB.AccessHistogramSize > 0) {
// The in-memory representation uses uint64_t for histogram entries.
>From 83336c8e84e24409acef3c5c988ad7c2e039452d Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Wed, 30 Jul 2025 00:35:20 +
Subject: [PATCH 2/3] Address comment
---
compiler-rt/lib/memprof/memprof_rawprofile.cpp | 12 ++--
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index fbcfee3d655eb..bf04afa679c9c 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -22,13 +22,13 @@ namespace
[llvm-branch-commits] [llvm] [MemProf] Fix FileCheck prefix in the histogram test. (PR #150506)
https://github.com/snehasish updated
https://github.com/llvm/llvm-project/pull/150506
>From 9305bff0674d2c6cd7522af4647daef645ee2f85 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Thu, 24 Jul 2025 06:25:00 +
Subject: [PATCH] Fix FileCheck prefix in the histogram test.
---
.../memprof-padding-histogram.test| 152 +-
1 file changed, 76 insertions(+), 76 deletions(-)
diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
index 79521f3aceb6d..2d0346e7cb259 100644
--- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -21,79 +21,79 @@ CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
CHECK-NEXT: -
CHECK: Records:
-CHEC-NEXTFunctionGUID: {{[0-9]+}}
-CHEC-NEXTAllocSites:
-CHEC-NEXT-
-CHEC-NEXT Callstack:
-CHEC-NEXT -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 3
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 5
-CHEC-NEXTMinAccessCount: 5
-CHEC-NEXTMaxAccessCount: 5
-CHEC-NEXTTotalSize: 24
-CHEC-NEXTMinSize: 24
-CHEC-NEXTMaxSize: 24
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 20
-CHEC-NEXTMinAccessDensity: 20
-CHEC-NEXTMaxAccessDensity: 20
-CHEC-NEXTTotalLifetimeAccessDensity: 2
-CHEC-NEXTMinLifetimeAccessDensity: 2
-CHEC-NEXTMaxLifetimeAccessDensity: 2
-CHEC-NEXTAccessHistogramSize: 3
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -1 -2
-CHEC-NEXT-
-CHEC-NEXT Callstack:
-CHEC-NEXT -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 10
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 4
-CHEC-NEXTMinAccessCount: 4
-CHEC-NEXTMaxAccessCount: 4
-CHEC-NEXTTotalSize: 48
-CHEC-NEXTMinSize: 48
-CHEC-NEXTMaxSize: 48
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 8
-CHEC-NEXTMinAccessDensity: 8
-CHEC-NEXTMaxAccessDensity: 8
-CHEC-NEXTTotalLifetimeAccessDensity: 8000
-CHEC-NEXTMinLifetimeAccessDensity: 8000
-CHEC-NEXTMaxLifetimeAccessDensity: 8000
-CHEC-NEXTAccessHistogramSize: 6
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -0 -0 -0 -1 -1
+CHECK-NEXTFunctionGUID: {{[0-9]+}}
+CHECK-NEXTAllocSites:
+CHECK-NEXT-
+CHECK-NEXT Callstack:
+CHECK-NEXT -
+CHECK-NEXTFunction: {{[0-9]+}}
+CHECK-NEXTSymbolName: main
+CHECK-NEXTLineOffset: 3
+CHECK-NEXTColumn: 10
+CHECK-NEXTInline: 0
+CHECK-NEXT MemInfoBlock:
+CHECK-NEXTAllocCount: 1
+CHECK-NEXTTotalAccessCount: 5
+CHECK-NEXTMinAccessCount: 5
+CHECK-NEXTMaxAccessCount: 5
+CHECK-NEXTTotalSize: 24
+CHECK-NEXTMinSize: 24
+CHECK-NEXTMaxSize: 24
+CHECK-NEXTAllocTimestamp: {{[0-9]+}}
+CHECK-NEXTDeallocTimestamp: {{[0-9]+}}
+CHECK-NEXTTotalLifetime: 0
+CHECK-NEXTMinLifetime: 0
+CHECK-NEXTMaxLifetime: 0
+CHECK-NEXTAllocCpuId: 11
+CHECK-NEXTDeallocCpuId: 11
+CHECK-NEXTNumMigratedCpu: 0
+CHECK-NEXTNumLifetimeOverlaps: 0
+CHECK-NEXTNumSameAllocCpu: 0
+CHECK-NEXTNumSameDeallocCpu: 0
+CHECK-NEXTDataTypeId: 0
+CHECK-NEXTTotalAccessDensity: 20
+CHECK-NEXTMinAccessDensity: 20
+CHECK-NEXTMaxAccessDensity: 20
+CHECK-NEXTTotalLifetimeAccessDensity: 2
+CHECK-NEXTMinLifetimeAccessDensity: 2
+CHECK-NEXTMaxLifetimeAccessDensity: 2
+CHECK-NEXTAccessHistogramSize: 3
+CHECK-NEXTAccessHistogram: {{[0-9]+}}
+CHECK-NEXTAcce
[llvm-branch-commits] [llvm] [MemProf] Fix FileCheck prefix in the histogram test. (PR #150506)
https://github.com/snehasish updated
https://github.com/llvm/llvm-project/pull/150506
>From 9305bff0674d2c6cd7522af4647daef645ee2f85 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Thu, 24 Jul 2025 06:25:00 +
Subject: [PATCH] Fix FileCheck prefix in the histogram test.
---
.../memprof-padding-histogram.test| 152 +-
1 file changed, 76 insertions(+), 76 deletions(-)
diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
index 79521f3aceb6d..2d0346e7cb259 100644
--- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -21,79 +21,79 @@ CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
CHECK-NEXT: -
CHECK: Records:
-CHEC-NEXTFunctionGUID: {{[0-9]+}}
-CHEC-NEXTAllocSites:
-CHEC-NEXT-
-CHEC-NEXT Callstack:
-CHEC-NEXT -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 3
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 5
-CHEC-NEXTMinAccessCount: 5
-CHEC-NEXTMaxAccessCount: 5
-CHEC-NEXTTotalSize: 24
-CHEC-NEXTMinSize: 24
-CHEC-NEXTMaxSize: 24
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 20
-CHEC-NEXTMinAccessDensity: 20
-CHEC-NEXTMaxAccessDensity: 20
-CHEC-NEXTTotalLifetimeAccessDensity: 2
-CHEC-NEXTMinLifetimeAccessDensity: 2
-CHEC-NEXTMaxLifetimeAccessDensity: 2
-CHEC-NEXTAccessHistogramSize: 3
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -1 -2
-CHEC-NEXT-
-CHEC-NEXT Callstack:
-CHEC-NEXT -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 10
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 4
-CHEC-NEXTMinAccessCount: 4
-CHEC-NEXTMaxAccessCount: 4
-CHEC-NEXTTotalSize: 48
-CHEC-NEXTMinSize: 48
-CHEC-NEXTMaxSize: 48
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 8
-CHEC-NEXTMinAccessDensity: 8
-CHEC-NEXTMaxAccessDensity: 8
-CHEC-NEXTTotalLifetimeAccessDensity: 8000
-CHEC-NEXTMinLifetimeAccessDensity: 8000
-CHEC-NEXTMaxLifetimeAccessDensity: 8000
-CHEC-NEXTAccessHistogramSize: 6
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -0 -0 -0 -1 -1
+CHECK-NEXTFunctionGUID: {{[0-9]+}}
+CHECK-NEXTAllocSites:
+CHECK-NEXT-
+CHECK-NEXT Callstack:
+CHECK-NEXT -
+CHECK-NEXTFunction: {{[0-9]+}}
+CHECK-NEXTSymbolName: main
+CHECK-NEXTLineOffset: 3
+CHECK-NEXTColumn: 10
+CHECK-NEXTInline: 0
+CHECK-NEXT MemInfoBlock:
+CHECK-NEXTAllocCount: 1
+CHECK-NEXTTotalAccessCount: 5
+CHECK-NEXTMinAccessCount: 5
+CHECK-NEXTMaxAccessCount: 5
+CHECK-NEXTTotalSize: 24
+CHECK-NEXTMinSize: 24
+CHECK-NEXTMaxSize: 24
+CHECK-NEXTAllocTimestamp: {{[0-9]+}}
+CHECK-NEXTDeallocTimestamp: {{[0-9]+}}
+CHECK-NEXTTotalLifetime: 0
+CHECK-NEXTMinLifetime: 0
+CHECK-NEXTMaxLifetime: 0
+CHECK-NEXTAllocCpuId: 11
+CHECK-NEXTDeallocCpuId: 11
+CHECK-NEXTNumMigratedCpu: 0
+CHECK-NEXTNumLifetimeOverlaps: 0
+CHECK-NEXTNumSameAllocCpu: 0
+CHECK-NEXTNumSameDeallocCpu: 0
+CHECK-NEXTDataTypeId: 0
+CHECK-NEXTTotalAccessDensity: 20
+CHECK-NEXTMinAccessDensity: 20
+CHECK-NEXTMaxAccessDensity: 20
+CHECK-NEXTTotalLifetimeAccessDensity: 2
+CHECK-NEXTMinLifetimeAccessDensity: 2
+CHECK-NEXTMaxLifetimeAccessDensity: 2
+CHECK-NEXTAccessHistogramSize: 3
+CHECK-NEXTAccessHistogram: {{[0-9]+}}
+CHECK-NEXTAcce
[llvm-branch-commits] [clang] release/21.x: [clang-format] Disable IntegerLiteralSeparator for C++ before c++14 (#151273) (PR #151362)
llvmbot wrote:
@llvm/pr-subscribers-clang-format
Author: None (llvmbot)
Changes
Backport 5fc482cfc0fa70c98e14d64d83dffbf7da03c303
Requested by: @owenca
---
Full diff: https://github.com/llvm/llvm-project/pull/151362.diff
2 Files Affected:
- (modified) clang/lib/Format/IntegerLiteralSeparatorFixer.cpp (+7-4)
- (modified) clang/unittests/Format/IntegerLiteralSeparatorTest.cpp (+3)
``diff
diff --git a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
index 87823ae32b113..aa752f5e3148a 100644
--- a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
+++ b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
@@ -45,15 +45,18 @@ std::pair
IntegerLiteralSeparatorFixer::process(const Environment &Env,
const FormatStyle &Style) {
switch (Style.Language) {
- case FormatStyle::LK_Cpp:
- case FormatStyle::LK_ObjC:
-Separator = '\'';
-break;
case FormatStyle::LK_CSharp:
case FormatStyle::LK_Java:
case FormatStyle::LK_JavaScript:
Separator = '_';
break;
+ case FormatStyle::LK_Cpp:
+ case FormatStyle::LK_ObjC:
+if (Style.Standard >= FormatStyle::LS_Cpp14) {
+ Separator = '\'';
+ break;
+}
+[[fallthrough]];
default:
return {};
}
diff --git a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
index b1e42e924e05c..67b9cc9037905 100644
--- a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
+++ b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
@@ -83,6 +83,9 @@ TEST_F(IntegerLiteralSeparatorTest, SingleQuoteAsSeparator) {
"d = 5678_km;\n"
"h = 0xDEF_u16;",
Style);
+
+ Style.Standard = FormatStyle::LS_Cpp11;
+ verifyFormat("ld = 1234L;", Style);
}
TEST_F(IntegerLiteralSeparatorTest, UnderscoreAsSeparator) {
``
https://github.com/llvm/llvm-project/pull/151362
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/21.x: [clang-format] Disable IntegerLiteralSeparator for C++ before c++14 (#151273) (PR #151362)
https://github.com/llvmbot created
https://github.com/llvm/llvm-project/pull/151362
Backport 5fc482cfc0fa70c98e14d64d83dffbf7da03c303
Requested by: @owenca
>From dd68262f577de21b19dda270e5b97e2327aa8186 Mon Sep 17 00:00:00 2001
From: Owen Pan
Date: Wed, 30 Jul 2025 09:43:46 -0700
Subject: [PATCH] [clang-format] Disable IntegerLiteralSeparator for C++ before
c++14 (#151273)
Fixes #151102
(cherry picked from commit 5fc482cfc0fa70c98e14d64d83dffbf7da03c303)
---
clang/lib/Format/IntegerLiteralSeparatorFixer.cpp | 11 +++
.../unittests/Format/IntegerLiteralSeparatorTest.cpp | 3 +++
2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
index 87823ae32b113..aa752f5e3148a 100644
--- a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
+++ b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
@@ -45,15 +45,18 @@ std::pair
IntegerLiteralSeparatorFixer::process(const Environment &Env,
const FormatStyle &Style) {
switch (Style.Language) {
- case FormatStyle::LK_Cpp:
- case FormatStyle::LK_ObjC:
-Separator = '\'';
-break;
case FormatStyle::LK_CSharp:
case FormatStyle::LK_Java:
case FormatStyle::LK_JavaScript:
Separator = '_';
break;
+ case FormatStyle::LK_Cpp:
+ case FormatStyle::LK_ObjC:
+if (Style.Standard >= FormatStyle::LS_Cpp14) {
+ Separator = '\'';
+ break;
+}
+[[fallthrough]];
default:
return {};
}
diff --git a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
index b1e42e924e05c..67b9cc9037905 100644
--- a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
+++ b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
@@ -83,6 +83,9 @@ TEST_F(IntegerLiteralSeparatorTest, SingleQuoteAsSeparator) {
"d = 5678_km;\n"
"h = 0xDEF_u16;",
Style);
+
+ Style.Standard = FormatStyle::LS_Cpp11;
+ verifyFormat("ld = 1234L;", Style);
}
TEST_F(IntegerLiteralSeparatorTest, UnderscoreAsSeparator) {
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/21.x: [clang-format] Disable IntegerLiteralSeparator for C++ before c++14 (#151273) (PR #151362)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/151362 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/21.x: [clang-format] Disable IntegerLiteralSeparator for C++ before c++14 (#151273) (PR #151362)
llvmbot wrote: @HazardyKnusperkeks What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/151362 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libcxxabi] [llvm] release/21.x: [libc++][hardening] Introduce assertion semantics. (#149459) (PR #151095)
var-const wrote: @tru Friendly ping. :) The CI job seems stuck but IIUC, it succeeded, it's just that it's stuck on reporting itself as done (@ldionne please correct me if I'm wrong here). https://github.com/llvm/llvm-project/pull/151095 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libc] [llvm] [libc][math] Refactor atanhf implementation to header-only in src/__support/math folder. (PR #151399)
https://github.com/bassiounix updated
https://github.com/llvm/llvm-project/pull/151399
>From 2fd12f451dbc98bc078fc3d86e71227e66950e3d Mon Sep 17 00:00:00 2001
From: bassiounix
Date: Thu, 31 Jul 2025 00:41:13 +0300
Subject: [PATCH] [libc][math] Refactor atanhf implementation to header-only in
src/__support/math folder.
---
libc/shared/math.h| 1 +
libc/shared/math/atanhf.h | 23 ++
libc/src/__support/math/CMakeLists.txt| 11 +++
libc/src/__support/math/atanhf.h | 76 +++
libc/src/math/generic/CMakeLists.txt | 5 +-
libc/src/math/generic/atanhf.cpp | 56 +-
libc/test/shared/CMakeLists.txt | 1 +
libc/test/shared/shared_math_test.cpp | 1 +
.../llvm-project-overlay/libc/BUILD.bazel | 20 +++--
9 files changed, 129 insertions(+), 65 deletions(-)
create mode 100644 libc/shared/math/atanhf.h
create mode 100644 libc/src/__support/math/atanhf.h
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 6cb583c08dedd..ddf219ece8ff1 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -28,6 +28,7 @@
#include "math/atan2f128.h"
#include "math/atanf.h"
#include "math/atanf16.h"
+#include "math/atanhf.h"
#include "math/erff.h"
#include "math/exp.h"
#include "math/exp10.h"
diff --git a/libc/shared/math/atanhf.h b/libc/shared/math/atanhf.h
new file mode 100644
index 0..763fb3e00a659
--- /dev/null
+++ b/libc/shared/math/atanhf.h
@@ -0,0 +1,23 @@
+//===-- Shared atanhf function --*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATANHF_H
+#define LLVM_LIBC_SHARED_MATH_ATANHF_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atanhf.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atanhf;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ATANHF_H
diff --git a/libc/src/__support/math/CMakeLists.txt
b/libc/src/__support/math/CMakeLists.txt
index caafdc2cbf1d6..500dd9de2c555 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -275,6 +275,17 @@ add_header_library(
libc.src.__support.macros.optimization
)
+add_header_library(
+ atanhf
+ HDRS
+atanhf.h
+ DEPENDS
+.acoshf_utils
+libc.src.__support.FPUtil.fp_bits
+libc.src.__support.FPUtil.fenv_impl
+libc.src.__support.macros.optimization
+)
+
add_header_library(
asinf
HDRS
diff --git a/libc/src/__support/math/atanhf.h b/libc/src/__support/math/atanhf.h
new file mode 100644
index 0..b3ee5bbb4d408
--- /dev/null
+++ b/libc/src/__support/math/atanhf.h
@@ -0,0 +1,76 @@
+//===-- Implementation header for atanhf *- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF_H
+
+#include "acoshf_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float atanhf(float x) {
+ using namespace acoshf_internal;
+ using FPBits = typename fputil::FPBits;
+
+ FPBits xbits(x);
+ Sign sign = xbits.sign();
+ uint32_t x_abs = xbits.abs().uintval();
+
+ // |x| >= 1.0
+ if (LIBC_UNLIKELY(x_abs >= 0x3F80'U)) {
+if (xbits.is_nan()) {
+ if (xbits.is_signaling_nan()) {
+fputil::raise_except_if_required(FE_INVALID);
+return FPBits::quiet_nan().get_val();
+ }
+ return x;
+}
+// |x| == 1.0
+if (x_abs == 0x3F80'U) {
+ fputil::set_errno_if_required(ERANGE);
+ fputil::raise_except_if_required(FE_DIVBYZERO);
+ return FPBits::inf(sign).get_val();
+} else {
+ fputil::set_errno_if_required(EDOM);
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+}
+ }
+
+ // |x| < ~0.10
+ if (LIBC_UNLIKELY(x_abs <= 0x3dcc'U)) {
+// |x| <= 2^-26
+if (LIBC_UNLIKELY(x_abs <= 0x3280'U)) {
+ return static_cast(LIBC_UNLIKELY(x_abs == 0)
+? x
+: (x + 0x1.5p-2 * x * x * x));
+}
+
+double xdbl = x;
+double x2
[llvm-branch-commits] [llvm] [MemProf] Fix FileCheck prefix in the histogram test. (PR #150506)
https://github.com/snehasish updated
https://github.com/llvm/llvm-project/pull/150506
>From 385e2e93795c3520760c7592a45f256aaad0694b Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Thu, 24 Jul 2025 06:25:00 +
Subject: [PATCH] Fix FileCheck prefix in the histogram test.
---
.../memprof-padding-histogram.test| 152 +-
1 file changed, 76 insertions(+), 76 deletions(-)
diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
index 79521f3aceb6d..2d0346e7cb259 100644
--- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -21,79 +21,79 @@ CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
CHECK-NEXT: -
CHECK: Records:
-CHEC-NEXTFunctionGUID: {{[0-9]+}}
-CHEC-NEXTAllocSites:
-CHEC-NEXT-
-CHEC-NEXT Callstack:
-CHEC-NEXT -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 3
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 5
-CHEC-NEXTMinAccessCount: 5
-CHEC-NEXTMaxAccessCount: 5
-CHEC-NEXTTotalSize: 24
-CHEC-NEXTMinSize: 24
-CHEC-NEXTMaxSize: 24
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 20
-CHEC-NEXTMinAccessDensity: 20
-CHEC-NEXTMaxAccessDensity: 20
-CHEC-NEXTTotalLifetimeAccessDensity: 2
-CHEC-NEXTMinLifetimeAccessDensity: 2
-CHEC-NEXTMaxLifetimeAccessDensity: 2
-CHEC-NEXTAccessHistogramSize: 3
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -1 -2
-CHEC-NEXT-
-CHEC-NEXT Callstack:
-CHEC-NEXT -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 10
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 4
-CHEC-NEXTMinAccessCount: 4
-CHEC-NEXTMaxAccessCount: 4
-CHEC-NEXTTotalSize: 48
-CHEC-NEXTMinSize: 48
-CHEC-NEXTMaxSize: 48
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 8
-CHEC-NEXTMinAccessDensity: 8
-CHEC-NEXTMaxAccessDensity: 8
-CHEC-NEXTTotalLifetimeAccessDensity: 8000
-CHEC-NEXTMinLifetimeAccessDensity: 8000
-CHEC-NEXTMaxLifetimeAccessDensity: 8000
-CHEC-NEXTAccessHistogramSize: 6
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -0 -0 -0 -1 -1
+CHECK-NEXTFunctionGUID: {{[0-9]+}}
+CHECK-NEXTAllocSites:
+CHECK-NEXT-
+CHECK-NEXT Callstack:
+CHECK-NEXT -
+CHECK-NEXTFunction: {{[0-9]+}}
+CHECK-NEXTSymbolName: main
+CHECK-NEXTLineOffset: 3
+CHECK-NEXTColumn: 10
+CHECK-NEXTInline: 0
+CHECK-NEXT MemInfoBlock:
+CHECK-NEXTAllocCount: 1
+CHECK-NEXTTotalAccessCount: 5
+CHECK-NEXTMinAccessCount: 5
+CHECK-NEXTMaxAccessCount: 5
+CHECK-NEXTTotalSize: 24
+CHECK-NEXTMinSize: 24
+CHECK-NEXTMaxSize: 24
+CHECK-NEXTAllocTimestamp: {{[0-9]+}}
+CHECK-NEXTDeallocTimestamp: {{[0-9]+}}
+CHECK-NEXTTotalLifetime: 0
+CHECK-NEXTMinLifetime: 0
+CHECK-NEXTMaxLifetime: 0
+CHECK-NEXTAllocCpuId: 11
+CHECK-NEXTDeallocCpuId: 11
+CHECK-NEXTNumMigratedCpu: 0
+CHECK-NEXTNumLifetimeOverlaps: 0
+CHECK-NEXTNumSameAllocCpu: 0
+CHECK-NEXTNumSameDeallocCpu: 0
+CHECK-NEXTDataTypeId: 0
+CHECK-NEXTTotalAccessDensity: 20
+CHECK-NEXTMinAccessDensity: 20
+CHECK-NEXTMaxAccessDensity: 20
+CHECK-NEXTTotalLifetimeAccessDensity: 2
+CHECK-NEXTMinLifetimeAccessDensity: 2
+CHECK-NEXTMaxLifetimeAccessDensity: 2
+CHECK-NEXTAccessHistogramSize: 3
+CHECK-NEXTAccessHistogram: {{[0-9]+}}
+CHECK-NEXTAcce
[llvm-branch-commits] [llvm] [MemProf] Fix FileCheck prefix in the histogram test. (PR #150506)
https://github.com/snehasish updated
https://github.com/llvm/llvm-project/pull/150506
>From 385e2e93795c3520760c7592a45f256aaad0694b Mon Sep 17 00:00:00 2001
From: Snehasish Kumar
Date: Thu, 24 Jul 2025 06:25:00 +
Subject: [PATCH] Fix FileCheck prefix in the histogram test.
---
.../memprof-padding-histogram.test| 152 +-
1 file changed, 76 insertions(+), 76 deletions(-)
diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
index 79521f3aceb6d..2d0346e7cb259 100644
--- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -21,79 +21,79 @@ CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
CHECK-NEXT: -
CHECK: Records:
-CHEC-NEXTFunctionGUID: {{[0-9]+}}
-CHEC-NEXTAllocSites:
-CHEC-NEXT-
-CHEC-NEXT Callstack:
-CHEC-NEXT -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 3
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 5
-CHEC-NEXTMinAccessCount: 5
-CHEC-NEXTMaxAccessCount: 5
-CHEC-NEXTTotalSize: 24
-CHEC-NEXTMinSize: 24
-CHEC-NEXTMaxSize: 24
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 20
-CHEC-NEXTMinAccessDensity: 20
-CHEC-NEXTMaxAccessDensity: 20
-CHEC-NEXTTotalLifetimeAccessDensity: 2
-CHEC-NEXTMinLifetimeAccessDensity: 2
-CHEC-NEXTMaxLifetimeAccessDensity: 2
-CHEC-NEXTAccessHistogramSize: 3
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -1 -2
-CHEC-NEXT-
-CHEC-NEXT Callstack:
-CHEC-NEXT -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 10
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 4
-CHEC-NEXTMinAccessCount: 4
-CHEC-NEXTMaxAccessCount: 4
-CHEC-NEXTTotalSize: 48
-CHEC-NEXTMinSize: 48
-CHEC-NEXTMaxSize: 48
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 8
-CHEC-NEXTMinAccessDensity: 8
-CHEC-NEXTMaxAccessDensity: 8
-CHEC-NEXTTotalLifetimeAccessDensity: 8000
-CHEC-NEXTMinLifetimeAccessDensity: 8000
-CHEC-NEXTMaxLifetimeAccessDensity: 8000
-CHEC-NEXTAccessHistogramSize: 6
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -0 -0 -0 -1 -1
+CHECK-NEXTFunctionGUID: {{[0-9]+}}
+CHECK-NEXTAllocSites:
+CHECK-NEXT-
+CHECK-NEXT Callstack:
+CHECK-NEXT -
+CHECK-NEXTFunction: {{[0-9]+}}
+CHECK-NEXTSymbolName: main
+CHECK-NEXTLineOffset: 3
+CHECK-NEXTColumn: 10
+CHECK-NEXTInline: 0
+CHECK-NEXT MemInfoBlock:
+CHECK-NEXTAllocCount: 1
+CHECK-NEXTTotalAccessCount: 5
+CHECK-NEXTMinAccessCount: 5
+CHECK-NEXTMaxAccessCount: 5
+CHECK-NEXTTotalSize: 24
+CHECK-NEXTMinSize: 24
+CHECK-NEXTMaxSize: 24
+CHECK-NEXTAllocTimestamp: {{[0-9]+}}
+CHECK-NEXTDeallocTimestamp: {{[0-9]+}}
+CHECK-NEXTTotalLifetime: 0
+CHECK-NEXTMinLifetime: 0
+CHECK-NEXTMaxLifetime: 0
+CHECK-NEXTAllocCpuId: 11
+CHECK-NEXTDeallocCpuId: 11
+CHECK-NEXTNumMigratedCpu: 0
+CHECK-NEXTNumLifetimeOverlaps: 0
+CHECK-NEXTNumSameAllocCpu: 0
+CHECK-NEXTNumSameDeallocCpu: 0
+CHECK-NEXTDataTypeId: 0
+CHECK-NEXTTotalAccessDensity: 20
+CHECK-NEXTMinAccessDensity: 20
+CHECK-NEXTMaxAccessDensity: 20
+CHECK-NEXTTotalLifetimeAccessDensity: 2
+CHECK-NEXTMinLifetimeAccessDensity: 2
+CHECK-NEXTMaxLifetimeAccessDensity: 2
+CHECK-NEXTAccessHistogramSize: 3
+CHECK-NEXTAccessHistogram: {{[0-9]+}}
+CHECK-NEXTAcce
[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)
@@ -344,6 +350,36 @@ void
ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile &Obj,
exitWithError("no executable segment found", FileName);
}
+uint64_t ProfiledBinary::CanonicalizeNonTextAddress(uint64_t Address) {
+ uint64_t FileOffset = 0;
+ auto MMapIter = NonTextMMapEvents.lower_bound(Address);
+ if (MMapIter == NonTextMMapEvents.end())
+return Address; // No non-text mmap event found, return the address as is.
+
+ const auto &MMapEvent = MMapIter->second;
+
+ // If the address is within the non-text mmap event, calculates its file
mingmingl-llvm wrote:
done.
https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)
@@ -1027,6 +1027,20 @@ class FunctionSamples {
return VirtualCallsiteTypeCounts[mapIRLocToProfileLoc(Loc)];
}
+ /// At location \p Loc, add a type sample for the given \p Type with
+ /// \p Count. This function uses saturating arithmetic to clamp the result to
+ /// maximum uint64_t (the counter type) and returns counter_overflow to
caller
+ /// if the actual result is larger than maximum uint64_t.
+ sampleprof_error addTypeSamplesAt(const LineLocation &Loc, FunctionId Type,
+uint64_t Count) {
+auto &TypeCounts = getTypeSamplesAt(Loc);
+bool Overflowed = false;
+TypeCounts[Type] = SaturatingMultiplyAdd(Count, /* Weight= */ (uint64_t)1,
mingmingl-llvm wrote:
As clarified offline, the `SaturatingMultiplyAdd` will clamp the result if
overflow happens, and we insert the return value of `SaturatingMultiplyAdd`.
From this perspective, counter_overflow is more of informative warning as
opposed to a real error that wraps around a large unsigned integer into another
value.
I updated the comment to make this more explicit, and will probably prepare a
separate change around the warning handling (in SampleProf or InstrProf).
https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)
@@ -611,23 +635,11 @@ class PerfScriptReader : public PerfReaderBase {
static SmallVector TempFileCleanups;
protected:
- // The parsed MMap event
- struct MMapEvent {
-int64_t PID = 0;
-uint64_t Address = 0;
-uint64_t Size = 0;
-uint64_t Offset = 0;
-StringRef BinaryPath;
- };
-
// Check whether a given line is LBR sample
static bool isLBRSample(StringRef Line);
// Check whether a given line is MMAP event
static bool isMMapEvent(StringRef Line);
- // Parse a single line of a PERF_RECORD_MMAP event looking for a
- // mapping between the binary name and its memory layout.
- static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
-MMapEvent &MMap);
+
mingmingl-llvm wrote:
done.
https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm][AsmPrinter] Emit call graph section (PR #87576)
https://github.com/Prabhuk updated
https://github.com/llvm/llvm-project/pull/87576
>From 6b67376bd5e1f21606017c83cc67f2186ba36a33 Mon Sep 17 00:00:00 2001
From: Necip Fazil Yildiran
Date: Thu, 13 Mar 2025 01:41:04 +
Subject: [PATCH 1/6] Updated the test as reviewers suggested.
Created using spr 1.3.6-beta.1
---
llvm/test/CodeGen/X86/call-graph-section.ll | 66 +++
llvm/test/CodeGen/call-graph-section.ll | 73 -
2 files changed, 66 insertions(+), 73 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/call-graph-section.ll
delete mode 100644 llvm/test/CodeGen/call-graph-section.ll
diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll
b/llvm/test/CodeGen/X86/call-graph-section.ll
new file mode 100644
index 0..a77a2b8051ed3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/call-graph-section.ll
@@ -0,0 +1,66 @@
+;; Tests that we store the type identifiers in .callgraph section of the
binary.
+
+; RUN: llc --call-graph-section -filetype=obj -o - < %s | \
+; RUN: llvm-readelf -x .callgraph - | FileCheck %s
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @foo() #0 !type !4 {
+entry:
+ ret void
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @bar(i8 signext %a) #0 !type !5 {
+entry:
+ %a.addr = alloca i8, align 1
+ store i8 %a, ptr %a.addr, align 1
+ ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local ptr @baz(ptr %a) #0 !type !6 {
+entry:
+ %a.addr = alloca ptr, align 8
+ store ptr %a, ptr %a.addr, align 8
+ ret ptr null
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @main() #0 !type !7 {
+entry:
+ %retval = alloca i32, align 4
+ %fp_foo = alloca ptr, align 8
+ %a = alloca i8, align 1
+ %fp_bar = alloca ptr, align 8
+ %fp_baz = alloca ptr, align 8
+ store i32 0, ptr %retval, align 4
+ store ptr @foo, ptr %fp_foo, align 8
+ %0 = load ptr, ptr %fp_foo, align 8
+ call void (...) %0() [ "callee_type"(metadata !"_ZTSFvE.generalized") ]
+ store ptr @bar, ptr %fp_bar, align 8
+ %1 = load ptr, ptr %fp_bar, align 8
+ %2 = load i8, ptr %a, align 1
+ %call = call i32 %1(i8 signext %2) [ "callee_type"(metadata
!"_ZTSFicE.generalized") ]
+ store ptr @baz, ptr %fp_baz, align 8
+ %3 = load ptr, ptr %fp_baz, align 8
+ %call1 = call ptr %3(ptr %a) [ "callee_type"(metadata
!"_ZTSFPvS_E.generalized") ]
+ call void @foo() [ "callee_type"(metadata !"_ZTSFvE.generalized") ]
+ %4 = load i8, ptr %a, align 1
+ %call2 = call i32 @bar(i8 signext %4) [ "callee_type"(metadata
!"_ZTSFicE.generalized") ]
+ %call3 = call ptr @baz(ptr %a) [ "callee_type"(metadata
!"_ZTSFPvS_E.generalized") ]
+ ret void
+}
+
+;; Check that the numeric type id (md5 hash) for the below type ids are emitted
+;; to the callgraph section.
+
+; CHECK: Hex dump of section '.callgraph':
+
+; CHECK-DAG: 2444f731 f5eecb3e
+!4 = !{i64 0, !"_ZTSFvE.generalized"}
+; CHECK-DAG: 5486bc59 814b8e30
+!5 = !{i64 0, !"_ZTSFicE.generalized"}
+; CHECK-DAG: 7ade6814 f897fd77
+!6 = !{i64 0, !"_ZTSFPvS_E.generalized"}
+; CHECK-DAG: caaf769a 600968fa
+!7 = !{i64 0, !"_ZTSFiE.generalized"}
diff --git a/llvm/test/CodeGen/call-graph-section.ll
b/llvm/test/CodeGen/call-graph-section.ll
deleted file mode 100644
index bb158d11e82c9..0
--- a/llvm/test/CodeGen/call-graph-section.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-; Tests that we store the type identifiers in .callgraph section of the binary.
-
-; RUN: llc --call-graph-section -filetype=obj -o - < %s | \
-; RUN: llvm-readelf -x .callgraph - | FileCheck %s
-
-target triple = "x86_64-unknown-linux-gnu"
-
-define dso_local void @foo() #0 !type !4 {
-entry:
- ret void
-}
-
-define dso_local i32 @bar(i8 signext %a) #0 !type !5 {
-entry:
- %a.addr = alloca i8, align 1
- store i8 %a, i8* %a.addr, align 1
- ret i32 0
-}
-
-define dso_local i32* @baz(i8* %a) #0 !type !6 {
-entry:
- %a.addr = alloca i8*, align 8
- store i8* %a, i8** %a.addr, align 8
- ret i32* null
-}
-
-define dso_local i32 @main() #0 !type !7 {
-entry:
- %retval = alloca i32, align 4
- %fp_foo = alloca void (...)*, align 8
- %a = alloca i8, align 1
- %fp_bar = alloca i32 (i8)*, align 8
- %fp_baz = alloca i32* (i8*)*, align 8
- store i32 0, i32* %retval, align 4
- store void (...)* bitcast (void ()* @foo to void (...)*), void (...)**
%fp_foo, align 8
- %0 = load void (...)*, void (...)** %fp_foo, align 8
- call void (...) %0() [ "callee_type"(metadata !"_ZTSFvE.generalized") ]
- store i32 (i8)* @bar, i32 (i8)** %fp_bar, align 8
- %1 = load i32 (i8)*, i32 (i8)** %fp_bar, align 8
- %2 = load i8, i8* %a, align 1
- %call = call i32 %1(i8 signext %2) [ "callee_type"(metadata
!"_ZTSFicE.generalized") ]
- store i32* (i8*)* @baz, i32* (i8*)** %fp_baz, align 8
- %3 = load i32* (i8*)*, i32* (i8*)** %fp_baz, align 8
- %call1 = call i32* %3(i8* %a) [ "callee_type"(metadata
!"_ZTSFPvS_E.generalized") ]
- call void @foo() [ "callee_type"(meta
[llvm-branch-commits] [clang] [clang] Introduce CallGraphSection option (PR #117037)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/117037 >From 6a12be2c5b60a95a06875b0b2c4f14228d1fa882 Mon Sep 17 00:00:00 2001 From: prabhukr Date: Wed, 12 Mar 2025 23:30:01 + Subject: [PATCH 1/2] Fix EOF newlines. Created using spr 1.3.6-beta.1 --- clang/test/Driver/call-graph-section.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/call-graph-section.c b/clang/test/Driver/call-graph-section.c index 108446729d857..5832aa6754137 100644 --- a/clang/test/Driver/call-graph-section.c +++ b/clang/test/Driver/call-graph-section.c @@ -2,4 +2,4 @@ // RUN: %clang -### -S -fcall-graph-section -fno-call-graph-section %s 2>&1 | FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s // CALL-GRAPH-SECTION: "-fcall-graph-section" -// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section" \ No newline at end of file +// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section" >From c67f714eaab9a7f1e4d2d76da28641b05710231d Mon Sep 17 00:00:00 2001 From: prabhukr Date: Mon, 21 Jul 2025 23:53:52 + Subject: [PATCH 2/2] Fix review comment on test file. Created using spr 1.3.6-beta.1 --- clang/test/Driver/call-graph-section.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Driver/call-graph-section.c b/clang/test/Driver/call-graph-section.c index 5832aa6754137..563f36de4119e 100644 --- a/clang/test/Driver/call-graph-section.c +++ b/clang/test/Driver/call-graph-section.c @@ -1,5 +1,5 @@ -// RUN: %clang -### -S -fcall-graph-section %s 2>&1 | FileCheck --check-prefix=CALL-GRAPH-SECTION %s -// RUN: %clang -### -S -fcall-graph-section -fno-call-graph-section %s 2>&1 | FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s +// RUN: %clang -### -fcall-graph-section %s 2>&1 | FileCheck --check-prefix=CALL-GRAPH-SECTION %s +// RUN: %clang -### -fcall-graph-section -fno-call-graph-section %s 2>&1 | FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s // CALL-GRAPH-SECTION: "-fcall-graph-section" // NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section" ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Introduce CallGraphSection option (PR #117037)
https://github.com/Prabhuk updated https://github.com/llvm/llvm-project/pull/117037 >From 6a12be2c5b60a95a06875b0b2c4f14228d1fa882 Mon Sep 17 00:00:00 2001 From: prabhukr Date: Wed, 12 Mar 2025 23:30:01 + Subject: [PATCH 1/2] Fix EOF newlines. Created using spr 1.3.6-beta.1 --- clang/test/Driver/call-graph-section.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/call-graph-section.c b/clang/test/Driver/call-graph-section.c index 108446729d857..5832aa6754137 100644 --- a/clang/test/Driver/call-graph-section.c +++ b/clang/test/Driver/call-graph-section.c @@ -2,4 +2,4 @@ // RUN: %clang -### -S -fcall-graph-section -fno-call-graph-section %s 2>&1 | FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s // CALL-GRAPH-SECTION: "-fcall-graph-section" -// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section" \ No newline at end of file +// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section" >From c67f714eaab9a7f1e4d2d76da28641b05710231d Mon Sep 17 00:00:00 2001 From: prabhukr Date: Mon, 21 Jul 2025 23:53:52 + Subject: [PATCH 2/2] Fix review comment on test file. Created using spr 1.3.6-beta.1 --- clang/test/Driver/call-graph-section.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Driver/call-graph-section.c b/clang/test/Driver/call-graph-section.c index 5832aa6754137..563f36de4119e 100644 --- a/clang/test/Driver/call-graph-section.c +++ b/clang/test/Driver/call-graph-section.c @@ -1,5 +1,5 @@ -// RUN: %clang -### -S -fcall-graph-section %s 2>&1 | FileCheck --check-prefix=CALL-GRAPH-SECTION %s -// RUN: %clang -### -S -fcall-graph-section -fno-call-graph-section %s 2>&1 | FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s +// RUN: %clang -### -fcall-graph-section %s 2>&1 | FileCheck --check-prefix=CALL-GRAPH-SECTION %s +// RUN: %clang -### -fcall-graph-section -fno-call-graph-section %s 2>&1 | FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s // CALL-GRAPH-SECTION: "-fcall-graph-section" // NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section" ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] callee_type metadata for indirect calls (PR #117036)
https://github.com/Prabhuk updated
https://github.com/llvm/llvm-project/pull/117036
>From b7fbe09b32ff02d4f7c52d82fbf8b5cd28138852 Mon Sep 17 00:00:00 2001
From: prabhukr
Date: Wed, 23 Apr 2025 04:05:47 +
Subject: [PATCH] Address review comments.
Created using spr 1.3.6-beta.1
---
clang/lib/CodeGen/CGCall.cpp| 8
clang/lib/CodeGen/CodeGenModule.cpp | 10 +-
clang/lib/CodeGen/CodeGenModule.h | 4 ++--
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 185ee1a970aac..d8ab7140f7943 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5780,19 +5780,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo
&CallInfo,
if (callOrInvoke) {
*callOrInvoke = CI;
if (CGM.getCodeGenOpts().CallGraphSection) {
- assert((TargetDecl && TargetDecl->getFunctionType() ||
- Callee.getAbstractInfo().getCalleeFunctionProtoType()) &&
- "cannot find callsite type");
QualType CST;
if (TargetDecl && TargetDecl->getFunctionType())
CST = QualType(TargetDecl->getFunctionType(), 0);
else if (const auto *FPT =
Callee.getAbstractInfo().getCalleeFunctionProtoType())
CST = QualType(FPT, 0);
+ else
+llvm_unreachable(
+"Cannot find the callee type to generate callee_type metadata.");
// Set type identifier metadata of indirect calls for call graph section.
if (!CST.isNull())
-CGM.CreateCalleeTypeMetadataForIcall(CST, *callOrInvoke);
+CGM.createCalleeTypeMetadataForIcall(CST, *callOrInvoke);
}
}
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp
b/clang/lib/CodeGen/CodeGenModule.cpp
index 43cd2405571cf..2fc99639a75cb 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2654,7 +2654,7 @@ void
CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
// Skip available_externally functions. They won't be codegen'ed in the
// current module anyway.
if (getContext().GetGVALinkageForFunction(FD) != GVA_AvailableExternally)
-CreateFunctionTypeMetadataForIcall(FD, F);
+createFunctionTypeMetadataForIcall(FD, F);
}
}
@@ -2868,7 +2868,7 @@ static bool hasExistingGeneralizedTypeMD(llvm::Function
*F) {
return MD->hasGeneralizedMDString();
}
-void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
+void CodeGenModule::createFunctionTypeMetadataForIcall(const FunctionDecl *FD,
llvm::Function *F) {
if (CodeGenOpts.CallGraphSection && !hasExistingGeneralizedTypeMD(F) &&
(!F->hasLocalLinkage() ||
@@ -2898,7 +2898,7 @@ void
CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId));
}
-void CodeGenModule::CreateCalleeTypeMetadataForIcall(const QualType &QT,
+void CodeGenModule::createCalleeTypeMetadataForIcall(const QualType &QT,
llvm::CallBase *CB) {
// Only if needed for call graph section and only for indirect calls.
if (!CodeGenOpts.CallGraphSection || !CB->isIndirectCall())
@@ -2909,7 +2909,7 @@ void
CodeGenModule::CreateCalleeTypeMetadataForIcall(const QualType &QT,
getLLVMContext(), {llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
llvm::Type::getInt64Ty(getLLVMContext()), 0)),
TypeIdMD});
- llvm::MDTuple *MDN = llvm::MDNode::get(getLLVMContext(), { TypeTuple });
+ llvm::MDTuple *MDN = llvm::MDNode::get(getLLVMContext(), {TypeTuple});
CB->setMetadata(llvm::LLVMContext::MD_callee_type, MDN);
}
@@ -3041,7 +3041,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD,
llvm::Function *F,
// jump table.
if (!CodeGenOpts.SanitizeCfiCrossDso ||
!CodeGenOpts.SanitizeCfiCanonicalJumpTables)
-CreateFunctionTypeMetadataForIcall(FD, F);
+createFunctionTypeMetadataForIcall(FD, F);
if (LangOpts.Sanitize.has(SanitizerKind::KCFI))
setKCFIType(FD, F);
diff --git a/clang/lib/CodeGen/CodeGenModule.h
b/clang/lib/CodeGen/CodeGenModule.h
index dfbe4388349dd..4b53f0f241b52 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1619,11 +1619,11 @@ class CodeGenModule : public CodeGenTypeCache {
llvm::Metadata *CreateMetadataIdentifierGeneralized(QualType T);
/// Create and attach type metadata to the given function.
- void CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
+ void createFunctionTypeMetadataForIcall(const FunctionDecl *FD,
llvm::Function *F);
/// Create and attach type metadata to the given call.
- void CreateCalleeTypeMetadataForIcall(const QualType &QT, llvm::CallBase
*CB);
+ void createCa
[llvm-branch-commits] [llvm] [llvm][AsmPrinter] Emit call graph section (PR #87576)
https://github.com/Prabhuk updated
https://github.com/llvm/llvm-project/pull/87576
>From 6b67376bd5e1f21606017c83cc67f2186ba36a33 Mon Sep 17 00:00:00 2001
From: Necip Fazil Yildiran
Date: Thu, 13 Mar 2025 01:41:04 +
Subject: [PATCH 1/6] Updated the test as reviewers suggested.
Created using spr 1.3.6-beta.1
---
llvm/test/CodeGen/X86/call-graph-section.ll | 66 +++
llvm/test/CodeGen/call-graph-section.ll | 73 -
2 files changed, 66 insertions(+), 73 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/call-graph-section.ll
delete mode 100644 llvm/test/CodeGen/call-graph-section.ll
diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll
b/llvm/test/CodeGen/X86/call-graph-section.ll
new file mode 100644
index 0..a77a2b8051ed3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/call-graph-section.ll
@@ -0,0 +1,66 @@
+;; Tests that we store the type identifiers in .callgraph section of the
binary.
+
+; RUN: llc --call-graph-section -filetype=obj -o - < %s | \
+; RUN: llvm-readelf -x .callgraph - | FileCheck %s
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @foo() #0 !type !4 {
+entry:
+ ret void
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @bar(i8 signext %a) #0 !type !5 {
+entry:
+ %a.addr = alloca i8, align 1
+ store i8 %a, ptr %a.addr, align 1
+ ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local ptr @baz(ptr %a) #0 !type !6 {
+entry:
+ %a.addr = alloca ptr, align 8
+ store ptr %a, ptr %a.addr, align 8
+ ret ptr null
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @main() #0 !type !7 {
+entry:
+ %retval = alloca i32, align 4
+ %fp_foo = alloca ptr, align 8
+ %a = alloca i8, align 1
+ %fp_bar = alloca ptr, align 8
+ %fp_baz = alloca ptr, align 8
+ store i32 0, ptr %retval, align 4
+ store ptr @foo, ptr %fp_foo, align 8
+ %0 = load ptr, ptr %fp_foo, align 8
+ call void (...) %0() [ "callee_type"(metadata !"_ZTSFvE.generalized") ]
+ store ptr @bar, ptr %fp_bar, align 8
+ %1 = load ptr, ptr %fp_bar, align 8
+ %2 = load i8, ptr %a, align 1
+ %call = call i32 %1(i8 signext %2) [ "callee_type"(metadata
!"_ZTSFicE.generalized") ]
+ store ptr @baz, ptr %fp_baz, align 8
+ %3 = load ptr, ptr %fp_baz, align 8
+ %call1 = call ptr %3(ptr %a) [ "callee_type"(metadata
!"_ZTSFPvS_E.generalized") ]
+ call void @foo() [ "callee_type"(metadata !"_ZTSFvE.generalized") ]
+ %4 = load i8, ptr %a, align 1
+ %call2 = call i32 @bar(i8 signext %4) [ "callee_type"(metadata
!"_ZTSFicE.generalized") ]
+ %call3 = call ptr @baz(ptr %a) [ "callee_type"(metadata
!"_ZTSFPvS_E.generalized") ]
+ ret void
+}
+
+;; Check that the numeric type id (md5 hash) for the below type ids are emitted
+;; to the callgraph section.
+
+; CHECK: Hex dump of section '.callgraph':
+
+; CHECK-DAG: 2444f731 f5eecb3e
+!4 = !{i64 0, !"_ZTSFvE.generalized"}
+; CHECK-DAG: 5486bc59 814b8e30
+!5 = !{i64 0, !"_ZTSFicE.generalized"}
+; CHECK-DAG: 7ade6814 f897fd77
+!6 = !{i64 0, !"_ZTSFPvS_E.generalized"}
+; CHECK-DAG: caaf769a 600968fa
+!7 = !{i64 0, !"_ZTSFiE.generalized"}
diff --git a/llvm/test/CodeGen/call-graph-section.ll
b/llvm/test/CodeGen/call-graph-section.ll
deleted file mode 100644
index bb158d11e82c9..0
--- a/llvm/test/CodeGen/call-graph-section.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-; Tests that we store the type identifiers in .callgraph section of the binary.
-
-; RUN: llc --call-graph-section -filetype=obj -o - < %s | \
-; RUN: llvm-readelf -x .callgraph - | FileCheck %s
-
-target triple = "x86_64-unknown-linux-gnu"
-
-define dso_local void @foo() #0 !type !4 {
-entry:
- ret void
-}
-
-define dso_local i32 @bar(i8 signext %a) #0 !type !5 {
-entry:
- %a.addr = alloca i8, align 1
- store i8 %a, i8* %a.addr, align 1
- ret i32 0
-}
-
-define dso_local i32* @baz(i8* %a) #0 !type !6 {
-entry:
- %a.addr = alloca i8*, align 8
- store i8* %a, i8** %a.addr, align 8
- ret i32* null
-}
-
-define dso_local i32 @main() #0 !type !7 {
-entry:
- %retval = alloca i32, align 4
- %fp_foo = alloca void (...)*, align 8
- %a = alloca i8, align 1
- %fp_bar = alloca i32 (i8)*, align 8
- %fp_baz = alloca i32* (i8*)*, align 8
- store i32 0, i32* %retval, align 4
- store void (...)* bitcast (void ()* @foo to void (...)*), void (...)**
%fp_foo, align 8
- %0 = load void (...)*, void (...)** %fp_foo, align 8
- call void (...) %0() [ "callee_type"(metadata !"_ZTSFvE.generalized") ]
- store i32 (i8)* @bar, i32 (i8)** %fp_bar, align 8
- %1 = load i32 (i8)*, i32 (i8)** %fp_bar, align 8
- %2 = load i8, i8* %a, align 1
- %call = call i32 %1(i8 signext %2) [ "callee_type"(metadata
!"_ZTSFicE.generalized") ]
- store i32* (i8*)* @baz, i32* (i8*)** %fp_baz, align 8
- %3 = load i32* (i8*)*, i32* (i8*)** %fp_baz, align 8
- %call1 = call i32* %3(i8* %a) [ "callee_type"(metadata
!"_ZTSFPvS_E.generalized") ]
- call void @foo() [ "callee_type"(meta
[llvm-branch-commits] callgraph make flag experimental (PR #151402)
https://github.com/Prabhuk created https://github.com/llvm/llvm-project/pull/151402 None ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] callee_type metadata for indirect calls (PR #117036)
https://github.com/Prabhuk updated
https://github.com/llvm/llvm-project/pull/117036
>From b7fbe09b32ff02d4f7c52d82fbf8b5cd28138852 Mon Sep 17 00:00:00 2001
From: prabhukr
Date: Wed, 23 Apr 2025 04:05:47 +
Subject: [PATCH] Address review comments.
Created using spr 1.3.6-beta.1
---
clang/lib/CodeGen/CGCall.cpp| 8
clang/lib/CodeGen/CodeGenModule.cpp | 10 +-
clang/lib/CodeGen/CodeGenModule.h | 4 ++--
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 185ee1a970aac..d8ab7140f7943 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5780,19 +5780,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo
&CallInfo,
if (callOrInvoke) {
*callOrInvoke = CI;
if (CGM.getCodeGenOpts().CallGraphSection) {
- assert((TargetDecl && TargetDecl->getFunctionType() ||
- Callee.getAbstractInfo().getCalleeFunctionProtoType()) &&
- "cannot find callsite type");
QualType CST;
if (TargetDecl && TargetDecl->getFunctionType())
CST = QualType(TargetDecl->getFunctionType(), 0);
else if (const auto *FPT =
Callee.getAbstractInfo().getCalleeFunctionProtoType())
CST = QualType(FPT, 0);
+ else
+llvm_unreachable(
+"Cannot find the callee type to generate callee_type metadata.");
// Set type identifier metadata of indirect calls for call graph section.
if (!CST.isNull())
-CGM.CreateCalleeTypeMetadataForIcall(CST, *callOrInvoke);
+CGM.createCalleeTypeMetadataForIcall(CST, *callOrInvoke);
}
}
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp
b/clang/lib/CodeGen/CodeGenModule.cpp
index 43cd2405571cf..2fc99639a75cb 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2654,7 +2654,7 @@ void
CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
// Skip available_externally functions. They won't be codegen'ed in the
// current module anyway.
if (getContext().GetGVALinkageForFunction(FD) != GVA_AvailableExternally)
-CreateFunctionTypeMetadataForIcall(FD, F);
+createFunctionTypeMetadataForIcall(FD, F);
}
}
@@ -2868,7 +2868,7 @@ static bool hasExistingGeneralizedTypeMD(llvm::Function
*F) {
return MD->hasGeneralizedMDString();
}
-void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
+void CodeGenModule::createFunctionTypeMetadataForIcall(const FunctionDecl *FD,
llvm::Function *F) {
if (CodeGenOpts.CallGraphSection && !hasExistingGeneralizedTypeMD(F) &&
(!F->hasLocalLinkage() ||
@@ -2898,7 +2898,7 @@ void
CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId));
}
-void CodeGenModule::CreateCalleeTypeMetadataForIcall(const QualType &QT,
+void CodeGenModule::createCalleeTypeMetadataForIcall(const QualType &QT,
llvm::CallBase *CB) {
// Only if needed for call graph section and only for indirect calls.
if (!CodeGenOpts.CallGraphSection || !CB->isIndirectCall())
@@ -2909,7 +2909,7 @@ void
CodeGenModule::CreateCalleeTypeMetadataForIcall(const QualType &QT,
getLLVMContext(), {llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
llvm::Type::getInt64Ty(getLLVMContext()), 0)),
TypeIdMD});
- llvm::MDTuple *MDN = llvm::MDNode::get(getLLVMContext(), { TypeTuple });
+ llvm::MDTuple *MDN = llvm::MDNode::get(getLLVMContext(), {TypeTuple});
CB->setMetadata(llvm::LLVMContext::MD_callee_type, MDN);
}
@@ -3041,7 +3041,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD,
llvm::Function *F,
// jump table.
if (!CodeGenOpts.SanitizeCfiCrossDso ||
!CodeGenOpts.SanitizeCfiCanonicalJumpTables)
-CreateFunctionTypeMetadataForIcall(FD, F);
+createFunctionTypeMetadataForIcall(FD, F);
if (LangOpts.Sanitize.has(SanitizerKind::KCFI))
setKCFIType(FD, F);
diff --git a/clang/lib/CodeGen/CodeGenModule.h
b/clang/lib/CodeGen/CodeGenModule.h
index dfbe4388349dd..4b53f0f241b52 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1619,11 +1619,11 @@ class CodeGenModule : public CodeGenTypeCache {
llvm::Metadata *CreateMetadataIdentifierGeneralized(QualType T);
/// Create and attach type metadata to the given function.
- void CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
+ void createFunctionTypeMetadataForIcall(const FunctionDecl *FD,
llvm::Function *F);
/// Create and attach type metadata to the given call.
- void CreateCalleeTypeMetadataForIcall(const QualType &QT, llvm::CallBase
*CB);
+ void createCa
[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add nounwind to hexagon-strcpy.ll (#151293) (PR #151458)
https://github.com/llvmbot created
https://github.com/llvm/llvm-project/pull/151458
Backport 3796efb
Requested by: @svs-quic
>From 96ebfde394f93570db6817e096d3fa95a38aa2d6 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli
Date: Thu, 31 Jul 2025 11:13:55 +0530
Subject: [PATCH] [Hexagon] Add nounwind to hexagon-strcpy.ll (#151293)
The test does not check for anything related to cfi information so we
don't really need them in the test checks. Also it looks like there were
some failures on the Alpine Linux builders due to the placement of the
cfi information in the output assembly.
I have also changed `-march` to `-mtriple` in the run line similar to
2208c97
(cherry picked from commit 3796efb5dc08d4596aa986bd03a1290c43e2e995)
---
llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll | 12 +++-
1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
b/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
index b23366bc11aca..f5430dfea5865 100644
--- a/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
+++ b/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
@@ -1,20 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 5
-; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=hexagon -verify-machineinstrs < %s | FileCheck %s
@.str = private unnamed_addr constant [31 x i8] c"DHRYSTONE PROGRAM, 3'RD
STRING\00", align 1
@.str1 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
-; Function Attrs: nounwind
declare i32 @printf(i8* nocapture readonly, ...)
; Function Attrs: nounwind
-define i32 @main() {
+define i32 @main() nounwind {
; CHECK-LABEL: main:
-; CHECK: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT:.cfi_def_cfa r30, 8
-; CHECK-NEXT:.cfi_offset r31, -4
-; CHECK-NEXT:.cfi_offset r30, -8
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT:{
; CHECK-NEXT: r0 = ##.L.str1
; CHECK-NEXT: r3:2 = CONST64(#2325073635944967245)
@@ -53,5 +48,4 @@ entry:
ret i32 0
}
-; Function Attrs: nounwind
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly,
i32, i32, i1)
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add nounwind to hexagon-strcpy.ll (#151293) (PR #151458)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/151458 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add nounwind to hexagon-strcpy.ll (#151293) (PR #151458)
llvmbot wrote: @androm3da What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/151458 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add nounwind to hexagon-strcpy.ll (#151293) (PR #151458)
llvmbot wrote:
@llvm/pr-subscribers-backend-hexagon
Author: None (llvmbot)
Changes
Backport 3796efb
Requested by: @svs-quic
---
Full diff: https://github.com/llvm/llvm-project/pull/151458.diff
1 Files Affected:
- (modified) llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll (+3-9)
``diff
diff --git a/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
b/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
index b23366bc11aca..f5430dfea5865 100644
--- a/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
+++ b/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
@@ -1,20 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 5
-; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=hexagon -verify-machineinstrs < %s | FileCheck %s
@.str = private unnamed_addr constant [31 x i8] c"DHRYSTONE PROGRAM, 3'RD
STRING\00", align 1
@.str1 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
-; Function Attrs: nounwind
declare i32 @printf(i8* nocapture readonly, ...)
; Function Attrs: nounwind
-define i32 @main() {
+define i32 @main() nounwind {
; CHECK-LABEL: main:
-; CHECK: .cfi_startproc
-; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT:.cfi_def_cfa r30, 8
-; CHECK-NEXT:.cfi_offset r31, -4
-; CHECK-NEXT:.cfi_offset r30, -8
+; CHECK: // %bb.0: // %entry
; CHECK-NEXT:{
; CHECK-NEXT: r0 = ##.L.str1
; CHECK-NEXT: r3:2 = CONST64(#2325073635944967245)
@@ -53,5 +48,4 @@ entry:
ret i32 0
}
-; Function Attrs: nounwind
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly,
i32, i32, i1)
``
https://github.com/llvm/llvm-project/pull/151458
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Figure out required AGPR count for inline asm (PR #150910)
@@ -1200,16 +1200,61 @@ AAAMDWavesPerEU
&AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
}
-static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
- for (const auto &CI : IA->ParseConstraints()) {
+/// Compute the minimum number of AGPRs required to allocate the inline asm.
+static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
+ const CallBase &Call) {
+ unsigned ArgNo = 0;
+ unsigned ResNo = 0;
+ unsigned AGPRDefCount = 0;
+ unsigned AGPRUseCount = 0;
+ unsigned MaxPhysReg = 0;
+ const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
+
+ for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
+Type *Ty = nullptr;
+switch (CI.Type) {
+case InlineAsm::isOutput: {
+ Ty = Call.getType();
+ if (auto *STy = dyn_cast(Ty))
+Ty = STy->getElementType(ResNo);
+ ++ResNo;
+ break;
+}
+case InlineAsm::isInput: {
+ Ty = Call.getArgOperand(ArgNo++)->getType();
+ break;
+}
+case InlineAsm::isLabel:
+ continue;
+case InlineAsm::isClobber:
+ // Parse the physical register reference.
+ break;
+}
+
for (StringRef Code : CI.Codes) {
- Code.consume_front("{");
- if (Code.starts_with("a"))
-return true;
+ if (Code.starts_with("a")) {
+// Virtual register, compute number of registers based on the type.
+//
+// We ought to be going through TargetLowering to get the number of
+// registers, but we should avoid the dependence on CodeGen here.
+unsigned RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
+if (CI.Type == InlineAsm::isOutput) {
+ AGPRDefCount += RegCount;
+ if (CI.isEarlyClobber)
+AGPRUseCount += RegCount;
+} else
+ AGPRUseCount += RegCount;
+ } else {
+// Physical register reference
+auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
+if (Kind == 'a')
+ MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
+ }
}
}
- return false;
+ unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
+ return std::min(MaxVirtReg + MaxPhysReg, 256u);
ritter-x2a wrote:
For this code
```
define amdgpu_kernel void @foo() {
call void asm sideeffect "; use $0, $1, $2", "{a16},a,a"(i32 17, <8 x i32>
splat (i32 1), <16 x i32> splat (i32 2))
ret void
}
```
we allocate `; use a16, a[18:25], a[0:15]`, so the asm uses 25 AGPRs (arguably
26 since `a25` is used and `a17` is left out, not sure why it's not allocated
as `a[17:24]`, I'm not aware of alignment requirements for AGPRs).
This function computes 17 (the highest required physical register index + 1) +
24 (the number of virtual registers required) = 41 AGPRs required.
This over-approximation seems worth pointing out in a comment, if it's intended.
https://github.com/llvm/llvm-project/pull/150910
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)
https://github.com/easyonaadit updated
https://github.com/llvm/llvm-project/pull/150170
>From 2d22d224d27438d8d9d0979a5fd937653a1cb8af Mon Sep 17 00:00:00 2001
From: Aaditya
Date: Sat, 19 Jul 2025 12:57:27 +0530
Subject: [PATCH] Add builtins for wave reduction intrinsics
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 25 ++
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 58 +++
clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 378 +++
3 files changed, 461 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 878543566f0e3..b91b32457ff86 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -351,6 +351,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
+//===--===//
+
+// Wave Reduction builtins.
+
+//===--===//
+
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc")
+
//===--===//
// R600-NI only builtins.
//===--===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 7dccf82b1a7a3..c3d9ec5fc8309 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -295,11 +295,69 @@ void
CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
}
+static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
+ switch (BuiltinID) {
+ default:
+llvm_unreachable("Unknown BuiltinID for wave reduction");
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
+return Intrinsic::amdgcn_wave_reduce_add;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
+return Intrinsic::amdgcn_wave_reduce_sub;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
+return Intrinsic::amdgcn_wave_reduce_min;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
+return Intrinsic::amdgcn_wave_reduce_umin;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
+return Intrinsic::amdgcn_wave_reduce_max;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
+return Intrinsic::amdgcn_wave_reduce_umax;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
+return Intrinsic::amdgcn_wave_reduce_and;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
+return Intrinsic::amdgcn_wave_reduce_or;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
+return Intrinsic::amdgcn_wave_reduce_xor;
+ }
+}
+
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
llvm::SyncScope::ID SSID;
switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u
[llvm-branch-commits] [clang] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)
https://github.com/easyonaadit updated
https://github.com/llvm/llvm-project/pull/150170
>From 2d22d224d27438d8d9d0979a5fd937653a1cb8af Mon Sep 17 00:00:00 2001
From: Aaditya
Date: Sat, 19 Jul 2025 12:57:27 +0530
Subject: [PATCH] Add builtins for wave reduction intrinsics
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 25 ++
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 58 +++
clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 378 +++
3 files changed, 461 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 878543566f0e3..b91b32457ff86 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -351,6 +351,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
+//===--===//
+
+// Wave Reduction builtins.
+
+//===--===//
+
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc")
+
//===--===//
// R600-NI only builtins.
//===--===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 7dccf82b1a7a3..c3d9ec5fc8309 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -295,11 +295,69 @@ void
CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
}
+static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
+ switch (BuiltinID) {
+ default:
+llvm_unreachable("Unknown BuiltinID for wave reduction");
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
+return Intrinsic::amdgcn_wave_reduce_add;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
+return Intrinsic::amdgcn_wave_reduce_sub;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
+return Intrinsic::amdgcn_wave_reduce_min;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
+return Intrinsic::amdgcn_wave_reduce_umin;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
+return Intrinsic::amdgcn_wave_reduce_max;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
+return Intrinsic::amdgcn_wave_reduce_umax;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
+return Intrinsic::amdgcn_wave_reduce_and;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
+return Intrinsic::amdgcn_wave_reduce_or;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
+return Intrinsic::amdgcn_wave_reduce_xor;
+ }
+}
+
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
llvm::SyncScope::ID SSID;
switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u
[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 2 (PR #151309)
llvmbot wrote:
@llvm/pr-subscribers-backend-amdgpu
Author: Aaditya (easyonaadit)
Changes
Supporting Arithemtic Operations: `add`, `sub`
---
Patch is 168.45 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/151309.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+130-25)
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll (+1356)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll (+1663-48)
``diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0f529ef362199..56d8e739b6493 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5107,7 +5107,9 @@ static uint32_t getIdentityValueForWaveReduction(unsigned
Opc) {
case AMDGPU::V_CMP_GT_I64_e64: // max.i64
return std::numeric_limits::min();
case AMDGPU::S_ADD_I32:
+ case AMDGPU::S_ADD_U64_PSEUDO:
case AMDGPU::S_SUB_I32:
+ case AMDGPU::S_SUB_U64_PSEUDO:
case AMDGPU::S_OR_B32:
case AMDGPU::S_XOR_B32:
return std::numeric_limits::min();
@@ -5153,11 +5155,14 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
&MI,
}
case AMDGPU::S_XOR_B32:
case AMDGPU::S_ADD_I32:
-case AMDGPU::S_SUB_I32: {
+case AMDGPU::S_ADD_U64_PSEUDO:
+case AMDGPU::S_SUB_I32:
+case AMDGPU::S_SUB_U64_PSEUDO: {
const TargetRegisterClass *WaveMaskRegClass = TRI->getWaveMaskRegClass();
const TargetRegisterClass *DstRegClass = MRI.getRegClass(DstReg);
Register ExecMask = MRI.createVirtualRegister(WaveMaskRegClass);
- Register ActiveLanes = MRI.createVirtualRegister(DstRegClass);
+ Register ActiveLanes =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
bool IsWave32 = ST.isWave32();
unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
@@ -5165,39 +5170,39 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
&MI,
unsigned CountReg =
IsWave32 ? AMDGPU::S_BCNT1_I32_B32 : AMDGPU::S_BCNT1_I32_B64;
- auto Exec =
BuildMI(BB, MI, DL, TII->get(MovOpc), ExecMask).addReg(ExecReg);
- auto NewAccumulator = BuildMI(BB, MI, DL, TII->get(CountReg),
ActiveLanes)
-.addReg(Exec->getOperand(0).getReg());
+ auto NewAccumulator =
+ BuildMI(BB, MI, DL, TII->get(CountReg), ActiveLanes)
+ .addReg(ExecMask);
+
+ switch (Opc) {
+ case AMDGPU::S_XOR_B32: {
+// Performing an XOR operation on a uniform value
+// depends on the parity of the number of active lanes.
+// For even parity, the result will be 0, for odd
+// parity the result will be the same as the input value.
+Register ParityRegister =
+MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- switch (Opc) {
- case AMDGPU::S_XOR_B32: {
-// Performing an XOR operation on a uniform value
-// depends on the parity of the number of active lanes.
-// For even parity, the result will be 0, for odd
-// parity the result will be the same as the input value.
-Register ParityRegister = MRI.createVirtualRegister(DstRegClass);
-
-auto ParityReg =
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_AND_B32), ParityRegister)
.addReg(NewAccumulator->getOperand(0).getReg())
-.addImm(1);
-BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
-.addReg(SrcReg)
-.addReg(ParityReg->getOperand(0).getReg());
-break;
- }
+.addImm(1)
+.setOperandDead(3); // Dead scc
+BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
+.addReg(SrcReg)
+.addReg(ParityRegister);
+break;
+ }
case AMDGPU::S_SUB_I32: {
Register NegatedVal = MRI.createVirtualRegister(DstRegClass);
// Take the negation of the source operand.
-auto InvertedValReg =
-BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), NegatedVal)
-.addImm(-1)
-.addReg(SrcReg);
+BuildMI(BB, MI, DL, TII->get(AMDGPU::S_SUB_I32), NegatedVal)
+.addImm(0)
+.addReg(SrcReg);
BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
-.addReg(InvertedValReg->getOperand(0).getReg())
+.addReg(NegatedVal)
.addReg(NewAccumulator->getOperand(0).getReg());
break;
}
@@ -5207,6 +5212,74 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
&MI,
.addReg(NewAccumulator->getOperand(0).getReg());
break;
}
+ case AMDGPU::S_ADD_U64_PSEUDO:
+ case AMDGPU::S_SUB_U64_PSEUDO: {
+Register Des
[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 3 (PR #151310)
llvmbot wrote:
@llvm/pr-subscribers-backend-amdgpu
Author: Aaditya (easyonaadit)
Changes
Supporting Arithemtic Operations: `and`, `or`, `xor`
---
Patch is 146.75 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/151310.diff
5 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+72-6)
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+3)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll (+854)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll (+855)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll (+1413)
``diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 56d8e739b6493..c8a0372aa0f8a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5111,9 +5111,12 @@ static uint32_t
getIdentityValueForWaveReduction(unsigned Opc) {
case AMDGPU::S_SUB_I32:
case AMDGPU::S_SUB_U64_PSEUDO:
case AMDGPU::S_OR_B32:
+ case AMDGPU::S_OR_B64:
case AMDGPU::S_XOR_B32:
+ case AMDGPU::S_XOR_B64:
return std::numeric_limits::min();
case AMDGPU::S_AND_B32:
+ case AMDGPU::S_AND_B64:
return std::numeric_limits::max();
default:
llvm_unreachable("Unexpected opcode in getIdentityValueForWaveReduction");
@@ -5146,7 +5149,9 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
&MI,
case AMDGPU::S_MAX_I32:
case AMDGPU::V_CMP_GT_I64_e64: /*max*/
case AMDGPU::S_AND_B32:
-case AMDGPU::S_OR_B32: {
+case AMDGPU::S_AND_B64:
+case AMDGPU::S_OR_B32:
+case AMDGPU::S_OR_B64: {
// Idempotent operations.
unsigned movOpc = is32BitOpc ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(BB, MI, DL, TII->get(movOpc), DstReg).addReg(SrcReg);
@@ -5154,6 +5159,7 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
&MI,
break;
}
case AMDGPU::S_XOR_B32:
+case AMDGPU::S_XOR_B64:
case AMDGPU::S_ADD_I32:
case AMDGPU::S_ADD_U64_PSEUDO:
case AMDGPU::S_SUB_I32:
@@ -5177,7 +5183,8 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
&MI,
.addReg(ExecMask);
switch (Opc) {
- case AMDGPU::S_XOR_B32: {
+ case AMDGPU::S_XOR_B32:
+ case AMDGPU::S_XOR_B64: {
// Performing an XOR operation on a uniform value
// depends on the parity of the number of active lanes.
// For even parity, the result will be 0, for odd
@@ -5189,10 +5196,54 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr
&MI,
.addReg(NewAccumulator->getOperand(0).getReg())
.addImm(1)
.setOperandDead(3); // Dead scc
-BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
-.addReg(SrcReg)
-.addReg(ParityRegister);
-break;
+if (is32BitOpc) {
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
+ .addReg(SrcReg)
+ .addReg(ParityRegister);
+ break;
+} else {
+ Register DestSub0 =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register DestSub1 =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register Op1H_Op0L_Reg =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register CarryReg =
+ MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+ const TargetRegisterClass *SrcSubRC =
+ TRI->getSubRegisterClass(SrcRC, AMDGPU::sub0);
+
+ MachineOperand Op1L = TII->buildExtractSubRegOrImm(
+ MI, MRI, MI.getOperand(1), SrcRC, AMDGPU::sub0, SrcSubRC);
+ MachineOperand Op1H = TII->buildExtractSubRegOrImm(
+ MI, MRI, MI.getOperand(1), SrcRC, AMDGPU::sub1, SrcSubRC);
+
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DestSub0)
+ .add(Op1L)
+ .addReg(ParityRegister);
+
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), Op1H_Op0L_Reg)
+ .add(Op1H)
+ .addReg(ParityRegister);
+
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_HI_U32), CarryReg)
+ .add(Op1L)
+ .addReg(ParityRegister);
+
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_ADD_U32), DestSub1)
+ .addReg(CarryReg)
+ .addReg(Op1H_Op0L_Reg)
+ .setOperandDead(3); // Dead scc
+
+ BuildMI(BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), DstReg)
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU
[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 3 (PR #151310)
https://github.com/easyonaadit ready_for_review https://github.com/llvm/llvm-project/pull/151310 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 2 (PR #151309)
https://github.com/easyonaadit ready_for_review https://github.com/llvm/llvm-project/pull/151309 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Enable scalable vectorization of linalg.unpack (PR #149293)
banach-space wrote:
**UPDATE: 30/7/25**
* This
[commit](https://github.com/llvm/llvm-project/pull/149293/commits/56108b1df69e150c475adc58880ca7dce5355b21)
addresses the remaining comments from @hanhanW .
* I have rebased this PR on top of
https://github.com/llvm/llvm-project/pull/151334. This rebase addresses this
[comment](https://github.com/llvm/llvm-project/pull/149293#discussion_r2237499014)
from @egebeysel .
**GENERAL OBSERVATIONS + FUTURE STEPS**
Having implemented #151334, I now realise that we don't require separate vector
sizes for the _write_ operation (there's a small twist though).
To illustrate, take this example:
```mlir
func.func @example(%source: tensor<8x4x16x16xf32>, %dest: tensor<64x127xf32>)
-> tensor<64x127xf32> {
%0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32>
return %0 : tensor<64x127xf32>
}
```
It will be vectorized as:
```mlir
func.func @example(%arg0: tensor<8x4x16x16xf32>, %arg1: tensor<64x127xf32>)
-> tensor<64x127xf32> {
%cst = arith.constant 0.00e+00 : f32
%c0 = arith.constant 0 : index
// This is key - vec Op 1 !!!
%0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %cst {in_bounds =
[true, true, true, true]} : tensor<8x4x16x16xf32>, vector<8x4x16x16xf32>
// This is key - vec Op 2 !!!
%1 = vector.transpose %0, [1, 2, 0, 3] : vector<8x4x16x16xf32> to
vector<4x16x8x16xf32>
// This is key - vec Op 3 !!!
%2 = vector.shape_cast %1 : vector<4x16x8x16xf32> to vector<64x128xf32>
%c0_0 = arith.constant 0 : index
// This is key - vec Op 4!!!
%3 = vector.transfer_write %2, %arg1[%c0_0, %c0_0] {in_bounds = [true,
false]} : vector<64x128xf32>, tensor<64x127xf32>
return %3 : tensor<64x127xf32>
}
```
Now, once we vectorize the read operation, the remaining sizes are already
pre-determined (i.e. the sizes for the _write_ operation):
* For `vector.transpose`, the sizes must match the sizes from
`vector.transfer_read` (% permutation).
* For `vector.shape_cast`, the input must match the output of
`vector.transpose`. The output is uniquely determined by e.g. applying
`outer_dims_perm` from `linalg.unpack` to the output from `vector.transpose`.
* For `vector.transfer_write`, we have to use the output shape from
`vector.shape_cast`.
TL;Dr We should only require sizes for the _write_ operation.
**TWIST**
While we should be able to infer the scalable flags, there is some logic still
missing. This should not be a problem though.
**NEXT STEPS**
While we could land this as is (IREE integration looks fine:
https://github.com/iree-org/iree/pull/21514, thanks @hanhanW ) and then iterate
in-tree, it might be "healthier" if there's one self-contained change.
Let me refine this and then integrate into IREE (to make sure that the
integration works). Also, @hanhanW , lets sync offline and make sure that
switching to "only vector sizes for the read Op" is going to work for IREE.
WDYT?
https://github.com/llvm/llvm-project/pull/149293
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][linalg] Enable scalable vectorization of linalg.unpack (PR #149293)
https://github.com/banach-space edited https://github.com/llvm/llvm-project/pull/149293 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy] [COFF] Ignore associative sections in executables (#151143) (PR #151336)
https://github.com/llvmbot created
https://github.com/llvm/llvm-project/pull/151336
Backport fcbbcffd2e6ea30097809ba0cd1e3b6003fa862f
Requested by: @mstorsjo
>From b2b1c3d83951ebe6665314f1d10bb38077d01912 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?=
Date: Wed, 30 Jul 2025 15:39:04 +0200
Subject: [PATCH] [llvm-objcopy] [COFF] Ignore associative sections in
executables (#151143)
COFF associative sections is a feature where relocatable object files
can have section snippets marked as related to another section snippet,
so they are kept or discarded in relation to that other section snippet.
When llvm-objcopy removes sections, it also removes sections that are
marked as associative to the removed section (as the associative
sections otherwise would end up orphaned).
In a linked executable module (EXE or DLL), section associativity is
meaningless - thus, we should ignore those fields from the input.
After linking, GNU ld keeps the SectionDefinition auxillary part of
symbols intact as it was in the source object file, which means that it
references section numbers in the source object files.
This fixes https://github.com/llvm/llvm-project/issues/53433.
(cherry picked from commit fcbbcffd2e6ea30097809ba0cd1e3b6003fa862f)
---
llvm/lib/ObjCopy/COFF/COFFReader.cpp | 2 +-
.../llvm-objcopy/COFF/exe-bogus-assoc.test| 134 ++
2 files changed, 135 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test
diff --git a/llvm/lib/ObjCopy/COFF/COFFReader.cpp
b/llvm/lib/ObjCopy/COFF/COFFReader.cpp
index 62a71d41ded5f..9b55f76e58404 100644
--- a/llvm/lib/ObjCopy/COFF/COFFReader.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFReader.cpp
@@ -135,7 +135,7 @@ Error COFFReader::readSymbols(Object &Obj, bool IsBigObj)
const {
// it is, find the target section unique id.
const coff_aux_section_definition *SD = SymRef.getSectionDefinition();
const coff_aux_weak_external *WE = SymRef.getWeakExternal();
-if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE && !Obj.IsPE) {
int32_t Index = SD->getNumber(IsBigObj);
if (Index <= 0 || static_cast(Index - 1) >= Sections.size())
return createStringError(object_error::parse_failed,
diff --git a/llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test
b/llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test
new file mode 100644
index 0..12f14b5d58e1c
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test
@@ -0,0 +1,134 @@
+## Test that bogus associative section symbols in executables are ignored.
+##
+## The executable contains two (bogus) associative section symbols, both for
+## (parts of) the .rdata section; one pointing at the .debug_info section
+## (which will be stripped out) and one pointing at a nonexistent section.
+##
+## Check that stripping does succeed, and that it doesn't end up removing
+## the .rdata section.
+
+# RUN: yaml2obj %s -o %t.in.exe
+
+# RUN: llvm-strip --strip-debug %t.in.exe -o %t.out.exe
+# RUN: llvm-readobj --sections %t.out.exe | FileCheck %s
+
+# CHECK: Name: .rdata
+
+--- !COFF
+OptionalHeader:
+ AddressOfEntryPoint: 4096
+ ImageBase: 5368709120
+ SectionAlignment: 4096
+ FileAlignment: 512
+ MajorOperatingSystemVersion: 4
+ MinorOperatingSystemVersion: 0
+ MajorImageVersion: 0
+ MinorImageVersion: 0
+ MajorSubsystemVersion: 5
+ MinorSubsystemVersion: 2
+ Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI
+ DLLCharacteristics: [ ]
+ SizeOfStackReserve: 2097152
+ SizeOfStackCommit: 4096
+ SizeOfHeapReserve: 1048576
+ SizeOfHeapCommit: 4096
+header:
+ Machine: IMAGE_FILE_MACHINE_AMD64
+ Characteristics: [ ]
+sections:
+ - Name:.text
+Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE,
IMAGE_SCN_MEM_READ ]
+VirtualAddress: 4096
+VirtualSize: 48
+SectionData:
E80600E80200C3C3C30F1F00
+SizeOfRawData: 512
+ - Name:.rdata
+Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+VirtualAddress: 8192
+VirtualSize: 4
+SectionData: ''
+SizeOfRawData: 512
+ - Name:.debug_info
+Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA,
IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ]
+VirtualAddress: 16384
+VirtualSize: 4
+SectionData: ''
+SizeOfRawData: 512
+symbols:
+ - Name:.text
+Value: 0
+SectionNumber: 1
+SimpleType: IMAGE_SYM_TYPE_NULL
+ComplexType: IMAGE_SYM_DTYPE_NULL
+StorageClass:IMAGE_SYM_CLASS_STATIC
+SectionDefinition:
+ Length: 11
+ NumberOfRelocations: 2
+ NumberOfLinenumbers: 0
+ CheckSum:1703692295
+ Number: 1
+ - Name:'.te
[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy] [COFF] Ignore associative sections in executables (#151143) (PR #151336)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/151336 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy] [COFF] Ignore associative sections in executables (#151143) (PR #151336)
llvmbot wrote: @cjacek What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/151336 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy] [COFF] Ignore associative sections in executables (#151143) (PR #151336)
llvmbot wrote:
@llvm/pr-subscribers-llvm-binary-utilities
Author: None (llvmbot)
Changes
Backport fcbbcffd2e6ea30097809ba0cd1e3b6003fa862f
Requested by: @mstorsjo
---
Full diff: https://github.com/llvm/llvm-project/pull/151336.diff
2 Files Affected:
- (modified) llvm/lib/ObjCopy/COFF/COFFReader.cpp (+1-1)
- (added) llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test (+134)
``diff
diff --git a/llvm/lib/ObjCopy/COFF/COFFReader.cpp
b/llvm/lib/ObjCopy/COFF/COFFReader.cpp
index 62a71d41ded5f..9b55f76e58404 100644
--- a/llvm/lib/ObjCopy/COFF/COFFReader.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFReader.cpp
@@ -135,7 +135,7 @@ Error COFFReader::readSymbols(Object &Obj, bool IsBigObj)
const {
// it is, find the target section unique id.
const coff_aux_section_definition *SD = SymRef.getSectionDefinition();
const coff_aux_weak_external *WE = SymRef.getWeakExternal();
-if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE && !Obj.IsPE) {
int32_t Index = SD->getNumber(IsBigObj);
if (Index <= 0 || static_cast(Index - 1) >= Sections.size())
return createStringError(object_error::parse_failed,
diff --git a/llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test
b/llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test
new file mode 100644
index 0..12f14b5d58e1c
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test
@@ -0,0 +1,134 @@
+## Test that bogus associative section symbols in executables are ignored.
+##
+## The executable contains two (bogus) associative section symbols, both for
+## (parts of) the .rdata section; one pointing at the .debug_info section
+## (which will be stripped out) and one pointing at a nonexistent section.
+##
+## Check that stripping does succeed, and that it doesn't end up removing
+## the .rdata section.
+
+# RUN: yaml2obj %s -o %t.in.exe
+
+# RUN: llvm-strip --strip-debug %t.in.exe -o %t.out.exe
+# RUN: llvm-readobj --sections %t.out.exe | FileCheck %s
+
+# CHECK: Name: .rdata
+
+--- !COFF
+OptionalHeader:
+ AddressOfEntryPoint: 4096
+ ImageBase: 5368709120
+ SectionAlignment: 4096
+ FileAlignment: 512
+ MajorOperatingSystemVersion: 4
+ MinorOperatingSystemVersion: 0
+ MajorImageVersion: 0
+ MinorImageVersion: 0
+ MajorSubsystemVersion: 5
+ MinorSubsystemVersion: 2
+ Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI
+ DLLCharacteristics: [ ]
+ SizeOfStackReserve: 2097152
+ SizeOfStackCommit: 4096
+ SizeOfHeapReserve: 1048576
+ SizeOfHeapCommit: 4096
+header:
+ Machine: IMAGE_FILE_MACHINE_AMD64
+ Characteristics: [ ]
+sections:
+ - Name:.text
+Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE,
IMAGE_SCN_MEM_READ ]
+VirtualAddress: 4096
+VirtualSize: 48
+SectionData:
E80600E80200C3C3C30F1F00
+SizeOfRawData: 512
+ - Name:.rdata
+Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+VirtualAddress: 8192
+VirtualSize: 4
+SectionData: ''
+SizeOfRawData: 512
+ - Name:.debug_info
+Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA,
IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ]
+VirtualAddress: 16384
+VirtualSize: 4
+SectionData: ''
+SizeOfRawData: 512
+symbols:
+ - Name:.text
+Value: 0
+SectionNumber: 1
+SimpleType: IMAGE_SYM_TYPE_NULL
+ComplexType: IMAGE_SYM_DTYPE_NULL
+StorageClass:IMAGE_SYM_CLASS_STATIC
+SectionDefinition:
+ Length: 11
+ NumberOfRelocations: 2
+ NumberOfLinenumbers: 0
+ CheckSum:1703692295
+ Number: 1
+ - Name:'.text$func1'
+Value: 11
+SectionNumber: 1
+SimpleType: IMAGE_SYM_TYPE_NULL
+ComplexType: IMAGE_SYM_DTYPE_NULL
+StorageClass:IMAGE_SYM_CLASS_STATIC
+SectionDefinition:
+ Length: 1
+ NumberOfRelocations: 0
+ NumberOfLinenumbers: 0
+ CheckSum:40735498
+ Number: 3
+ Selection: IMAGE_COMDAT_SELECT_ANY
+ - Name:.rdata
+Value: 0
+SectionNumber: 2
+SimpleType: IMAGE_SYM_TYPE_NULL
+ComplexType: IMAGE_SYM_DTYPE_NULL
+StorageClass:IMAGE_SYM_CLASS_STATIC
+SectionDefinition:
+ Length: 1
+ NumberOfRelocations: 0
+ NumberOfLinenumbers: 0
+ CheckSum:0
+ Number: 3
+ Selection: IMAGE_COMDAT_SELECT_ASSOCIATIVE
+ - Name:'.text$func2'
+Value: 12
+SectionNumber: 1
+SimpleType: IMAGE_SYM_TYPE_NULL
+ComplexType: IMAGE_SYM_DTYPE_NULL
+StorageClass:IMAGE_SYM_CLASS_STATIC
+SectionDefinition:
+ Length: 1
+ NumberOfRel
[llvm-branch-commits] [lldb] release/21.x: [lldb][AArch64][Linux] Show MTE store only setting in mte_ctrl (#145033) (PR #151111)
omjavaid wrote: > @omjavaid @omjavaid What do you think about merging this PR to the release > branch? This looks good to make the LLVM 21. +1 from my side. https://github.com/llvm/llvm-project/pull/15 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libc] [llvm] [libc][math] Refactor atan2f128 implementation to header-only in src/__support/math folder. (PR #151012)
https://github.com/bassiounix updated
https://github.com/llvm/llvm-project/pull/151012
>From a4bd4ed9b3ce4b833cad7421816ff03fb7df9fab Mon Sep 17 00:00:00 2001
From: bassiounix
Date: Mon, 28 Jul 2025 21:14:48 +0300
Subject: [PATCH 1/2] [libc][math] Refactor atan2f128 implementation to
header-only in src/__support/math folder.
---
libc/shared/math.h| 1 +
libc/shared/math/atan2f128.h | 29 +++
libc/src/__support/math/CMakeLists.txt| 15 ++
libc/src/__support/math/atan2f128.h | 212 ++
libc/src/math/generic/CMakeLists.txt | 10 +-
libc/src/math/generic/atan2f128.cpp | 190 +---
libc/test/shared/shared_math_test.cpp | 2 +
.../llvm-project-overlay/libc/BUILD.bazel | 24 +-
8 files changed, 284 insertions(+), 199 deletions(-)
create mode 100644 libc/shared/math/atan2f128.h
create mode 100644 libc/src/__support/math/atan2f128.h
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 527bb8d6214ae..6cb583c08dedd 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -25,6 +25,7 @@
#include "math/atan.h"
#include "math/atan2.h"
#include "math/atan2f.h"
+#include "math/atan2f128.h"
#include "math/atanf.h"
#include "math/atanf16.h"
#include "math/erff.h"
diff --git a/libc/shared/math/atan2f128.h b/libc/shared/math/atan2f128.h
new file mode 100644
index 0..d7aee40c69527
--- /dev/null
+++ b/libc/shared/math/atan2f128.h
@@ -0,0 +1,29 @@
+//===-- Shared atan2f128 function ---*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATAN2F128_H
+#define LLVM_LIBC_SHARED_MATH_ATAN2F128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atan2f128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atan2f128;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT128
+
+#endif // LLVM_LIBC_SHARED_MATH_ATAN2F128_H
diff --git a/libc/src/__support/math/CMakeLists.txt
b/libc/src/__support/math/CMakeLists.txt
index c197b19ed29de..caafdc2cbf1d6 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -230,6 +230,21 @@ add_header_library(
libc.src.__support.macros.optimization
)
+add_header_library(
+ atan2f128
+ HDRS
+atan2f128.h
+ DEPENDS
+.atan_utils
+libc.src.__support.integer_literals
+libc.src.__support.uint128
+libc.src.__support.FPUtil.dyadic_float
+libc.src.__support.FPUtil.fp_bits
+libc.src.__support.FPUtil.multiply_add
+libc.src.__support.FPUtil.nearest_integer
+libc.src.__support.macros.optimization
+)
+
add_header_library(
atanf
HDRS
diff --git a/libc/src/__support/math/atan2f128.h
b/libc/src/__support/math/atan2f128.h
new file mode 100644
index 0..89efaf1fd72a0
--- /dev/null
+++ b/libc/src/__support/math/atan2f128.h
@@ -0,0 +1,212 @@
+//===-- Implementation header for atan2f128 -*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "atan_utils.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/integer_literals.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/uint128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+// There are several range reduction steps we can take for atan2(y, x) as
+// follow:
+
+// * Range reduction 1: signness
+// atan2(y, x) will return a number between -PI and PI representing the angle
+// forming by the 0x axis and the vector (x, y) on the 0xy-plane.
+// In particular, we have that:
+// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant)
+// = pi + atan( y/x )if x < 0 and y >= 0 (II-quadrant)
+// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant)
+// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant)
+// Since atan function is odd, we can use the formula:
+// atan(-u) = -atan(u)
+// to adjust the a
[llvm-branch-commits] [libc] [llvm] [libc][math] Refactor atan2f implementation to header-only in src/__support/math folder. (PR #150993)
https://github.com/bassiounix updated
https://github.com/llvm/llvm-project/pull/150993
>From 37d0403d9fbb96d117cc8ce90cdee667ee9f86b2 Mon Sep 17 00:00:00 2001
From: bassiounix
Date: Mon, 28 Jul 2025 19:35:03 +0300
Subject: [PATCH] [libc][math] Refactor atan2f implementation to header-only in
src/__support/math folder.
---
libc/shared/math.h| 1 +
libc/shared/math/atan2f.h | 23 ++
libc/src/__support/math/CMakeLists.txt| 17 +
libc/src/__support/math/atan2f.h | 351 ++
.../generic => __support/math}/atan2f_float.h | 21 +-
libc/src/math/generic/CMakeLists.txt | 12 +-
libc/src/math/generic/atan2f.cpp | 328 +---
libc/test/shared/CMakeLists.txt | 1 +
libc/test/shared/shared_math_test.cpp | 1 +
.../llvm-project-overlay/libc/BUILD.bazel | 20 +-
10 files changed, 427 insertions(+), 348 deletions(-)
create mode 100644 libc/shared/math/atan2f.h
create mode 100644 libc/src/__support/math/atan2f.h
rename libc/src/{math/generic => __support/math}/atan2f_float.h (95%)
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 0605d918eb2af..527bb8d6214ae 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -24,6 +24,7 @@
#include "math/asinhf16.h"
#include "math/atan.h"
#include "math/atan2.h"
+#include "math/atan2f.h"
#include "math/atanf.h"
#include "math/atanf16.h"
#include "math/erff.h"
diff --git a/libc/shared/math/atan2f.h b/libc/shared/math/atan2f.h
new file mode 100644
index 0..2de09d25e19f8
--- /dev/null
+++ b/libc/shared/math/atan2f.h
@@ -0,0 +1,23 @@
+//===-- Shared atan2f function --*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATAN2F_H
+#define LLVM_LIBC_SHARED_MATH_ATAN2F_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atan2f.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atan2f;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ATAN2F_H
diff --git a/libc/src/__support/math/CMakeLists.txt
b/libc/src/__support/math/CMakeLists.txt
index bbb07b62552f6..c197b19ed29de 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -213,6 +213,23 @@ add_header_library(
libc.src.__support.macros.optimization
)
+add_header_library(
+ atan2f
+ HDRS
+atan2f_float.h
+atan2f.h
+ DEPENDS
+.inv_trigf_utils
+libc.src.__support.FPUtil.double_double
+libc.src.__support.FPUtil.fenv_impl
+libc.src.__support.FPUtil.fp_bits
+libc.src.__support.FPUtil.multiply_add
+libc.src.__support.FPUtil.nearest_integer
+libc.src.__support.FPUtil.polyeval
+libc.src.__support.macros.config
+libc.src.__support.macros.optimization
+)
+
add_header_library(
atanf
HDRS
diff --git a/libc/src/__support/math/atan2f.h b/libc/src/__support/math/atan2f.h
new file mode 100644
index 0..e3b19329126f4
--- /dev/null
+++ b/libc/src/__support/math/atan2f.h
@@ -0,0 +1,351 @@
+//===-- Implementation header for atan2f *- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H
+
+#include "inv_trigf_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) &&
\
+defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT)
+
+// We use float-float implementation to reduce size.
+#include "atan2f_float.h"
+
+#else
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace atan2f_internal {
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+// Look up tables for accurate pass:
+
+// atan(i/16) with i = 0..16, generated by Sollya with:
+// > for i from 0 to 16 do {
+// a = round(atan(i/16), D, RN);
+// b = round(atan(i/16) - a, D, RN);
+// print("{", b, ",", a, "},");
+// };
+static constexpr fputil::DoubleDouble ATAN_I[17] = {
+{0.0, 0.0},
+{-0x1.c934d86d23
[llvm-branch-commits] [libc] [llvm] [libc][math] Refactor atan2f128 implementation to header-only in src/__support/math folder. (PR #151012)
https://github.com/bassiounix updated
https://github.com/llvm/llvm-project/pull/151012
>From a4bd4ed9b3ce4b833cad7421816ff03fb7df9fab Mon Sep 17 00:00:00 2001
From: bassiounix
Date: Mon, 28 Jul 2025 21:14:48 +0300
Subject: [PATCH 1/2] [libc][math] Refactor atan2f128 implementation to
header-only in src/__support/math folder.
---
libc/shared/math.h| 1 +
libc/shared/math/atan2f128.h | 29 +++
libc/src/__support/math/CMakeLists.txt| 15 ++
libc/src/__support/math/atan2f128.h | 212 ++
libc/src/math/generic/CMakeLists.txt | 10 +-
libc/src/math/generic/atan2f128.cpp | 190 +---
libc/test/shared/shared_math_test.cpp | 2 +
.../llvm-project-overlay/libc/BUILD.bazel | 24 +-
8 files changed, 284 insertions(+), 199 deletions(-)
create mode 100644 libc/shared/math/atan2f128.h
create mode 100644 libc/src/__support/math/atan2f128.h
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 527bb8d6214ae..6cb583c08dedd 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -25,6 +25,7 @@
#include "math/atan.h"
#include "math/atan2.h"
#include "math/atan2f.h"
+#include "math/atan2f128.h"
#include "math/atanf.h"
#include "math/atanf16.h"
#include "math/erff.h"
diff --git a/libc/shared/math/atan2f128.h b/libc/shared/math/atan2f128.h
new file mode 100644
index 0..d7aee40c69527
--- /dev/null
+++ b/libc/shared/math/atan2f128.h
@@ -0,0 +1,29 @@
+//===-- Shared atan2f128 function ---*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATAN2F128_H
+#define LLVM_LIBC_SHARED_MATH_ATAN2F128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atan2f128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atan2f128;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT128
+
+#endif // LLVM_LIBC_SHARED_MATH_ATAN2F128_H
diff --git a/libc/src/__support/math/CMakeLists.txt
b/libc/src/__support/math/CMakeLists.txt
index c197b19ed29de..caafdc2cbf1d6 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -230,6 +230,21 @@ add_header_library(
libc.src.__support.macros.optimization
)
+add_header_library(
+ atan2f128
+ HDRS
+atan2f128.h
+ DEPENDS
+.atan_utils
+libc.src.__support.integer_literals
+libc.src.__support.uint128
+libc.src.__support.FPUtil.dyadic_float
+libc.src.__support.FPUtil.fp_bits
+libc.src.__support.FPUtil.multiply_add
+libc.src.__support.FPUtil.nearest_integer
+libc.src.__support.macros.optimization
+)
+
add_header_library(
atanf
HDRS
diff --git a/libc/src/__support/math/atan2f128.h
b/libc/src/__support/math/atan2f128.h
new file mode 100644
index 0..89efaf1fd72a0
--- /dev/null
+++ b/libc/src/__support/math/atan2f128.h
@@ -0,0 +1,212 @@
+//===-- Implementation header for atan2f128 -*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "atan_utils.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/integer_literals.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/uint128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+// There are several range reduction steps we can take for atan2(y, x) as
+// follow:
+
+// * Range reduction 1: signness
+// atan2(y, x) will return a number between -PI and PI representing the angle
+// forming by the 0x axis and the vector (x, y) on the 0xy-plane.
+// In particular, we have that:
+// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant)
+// = pi + atan( y/x )if x < 0 and y >= 0 (II-quadrant)
+// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant)
+// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant)
+// Since atan function is odd, we can use the formula:
+// atan(-u) = -atan(u)
+// to adjust the a
[llvm-branch-commits] [libc] [llvm] [libc][math] Refactor atan2f implementation to header-only in src/__support/math folder. (PR #150993)
https://github.com/bassiounix updated
https://github.com/llvm/llvm-project/pull/150993
>From 37d0403d9fbb96d117cc8ce90cdee667ee9f86b2 Mon Sep 17 00:00:00 2001
From: bassiounix
Date: Mon, 28 Jul 2025 19:35:03 +0300
Subject: [PATCH] [libc][math] Refactor atan2f implementation to header-only in
src/__support/math folder.
---
libc/shared/math.h| 1 +
libc/shared/math/atan2f.h | 23 ++
libc/src/__support/math/CMakeLists.txt| 17 +
libc/src/__support/math/atan2f.h | 351 ++
.../generic => __support/math}/atan2f_float.h | 21 +-
libc/src/math/generic/CMakeLists.txt | 12 +-
libc/src/math/generic/atan2f.cpp | 328 +---
libc/test/shared/CMakeLists.txt | 1 +
libc/test/shared/shared_math_test.cpp | 1 +
.../llvm-project-overlay/libc/BUILD.bazel | 20 +-
10 files changed, 427 insertions(+), 348 deletions(-)
create mode 100644 libc/shared/math/atan2f.h
create mode 100644 libc/src/__support/math/atan2f.h
rename libc/src/{math/generic => __support/math}/atan2f_float.h (95%)
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 0605d918eb2af..527bb8d6214ae 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -24,6 +24,7 @@
#include "math/asinhf16.h"
#include "math/atan.h"
#include "math/atan2.h"
+#include "math/atan2f.h"
#include "math/atanf.h"
#include "math/atanf16.h"
#include "math/erff.h"
diff --git a/libc/shared/math/atan2f.h b/libc/shared/math/atan2f.h
new file mode 100644
index 0..2de09d25e19f8
--- /dev/null
+++ b/libc/shared/math/atan2f.h
@@ -0,0 +1,23 @@
+//===-- Shared atan2f function --*- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATAN2F_H
+#define LLVM_LIBC_SHARED_MATH_ATAN2F_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atan2f.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atan2f;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ATAN2F_H
diff --git a/libc/src/__support/math/CMakeLists.txt
b/libc/src/__support/math/CMakeLists.txt
index bbb07b62552f6..c197b19ed29de 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -213,6 +213,23 @@ add_header_library(
libc.src.__support.macros.optimization
)
+add_header_library(
+ atan2f
+ HDRS
+atan2f_float.h
+atan2f.h
+ DEPENDS
+.inv_trigf_utils
+libc.src.__support.FPUtil.double_double
+libc.src.__support.FPUtil.fenv_impl
+libc.src.__support.FPUtil.fp_bits
+libc.src.__support.FPUtil.multiply_add
+libc.src.__support.FPUtil.nearest_integer
+libc.src.__support.FPUtil.polyeval
+libc.src.__support.macros.config
+libc.src.__support.macros.optimization
+)
+
add_header_library(
atanf
HDRS
diff --git a/libc/src/__support/math/atan2f.h b/libc/src/__support/math/atan2f.h
new file mode 100644
index 0..e3b19329126f4
--- /dev/null
+++ b/libc/src/__support/math/atan2f.h
@@ -0,0 +1,351 @@
+//===-- Implementation header for atan2f *- C++
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H
+
+#include "inv_trigf_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) &&
\
+defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT)
+
+// We use float-float implementation to reduce size.
+#include "atan2f_float.h"
+
+#else
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace atan2f_internal {
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+// Look up tables for accurate pass:
+
+// atan(i/16) with i = 0..16, generated by Sollya with:
+// > for i from 0 to 16 do {
+// a = round(atan(i/16), D, RN);
+// b = round(atan(i/16) - a, D, RN);
+// print("{", b, ",", a, "},");
+// };
+static constexpr fputil::DoubleDouble ATAN_I[17] = {
+{0.0, 0.0},
+{-0x1.c934d86d23
[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add v_cvt_sr|pk_bf8|fp8_f16 gfx1250 instructions (PR #151415)
llvmbot wrote:
@llvm/pr-subscribers-mc
@llvm/pr-subscribers-backend-amdgpu
Author: Stanislav Mekhanoshin (rampitec)
Changes
---
Patch is 122.29 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/151415.diff
28 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+4)
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (+138)
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+24)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+4)
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+23-1)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+1)
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+1)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.td (+2)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+1)
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+51)
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+16)
- (added) llvm/test/CodeGen/AMDGPU/code-size-estimate-gfx1250.ll (+28)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll (+539)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s (+145)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s (+145)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s (+64)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s (+64)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s (+64)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s (+64)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s (+25)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt (+167-3)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt (+64)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt (+64)
``diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index ec00fadf3039a..172ac467f7cad 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -702,6 +702,10 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc",
"gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc",
"gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc",
"gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 2595442ba7f9e..1c67fc3879bff 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -398,6 +398,144 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
}
+// CHECK-LABEL: @test_cvt_pk_bf8_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT:[[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8,
addrspace(5)
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:[[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[OUT_ADDR]] to ptr
+// CHECK-NEXT:[[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[A_ADDR]] to ptr
+// CHECK-NEXT:store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]],
align 8
+// CHECK-NEXT:store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align
4
+// CHECK-NEXT:[[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x
half> [[TMP0]])
+// CHECK-NEXT:[[TMP2:%.*]] = load ptr addrspace(1), ptr
[[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT:ret void
+//
+void test_cvt_pk_bf8_f16(global short* out, half2 a)
+{
+ *out = __builtin_amdgcn_cvt_pk_bf8_f16(a);
+}
+
+// CHECK-LABEL: @test_cvt_pk_fp8_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT:[[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8,
addrspace(5)
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:[[OUT_ADDR_ASCAST:%.*]]
[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add v_cvt_sr|pk_bf8|fp8_f16 gfx1250 instructions (PR #151415)
llvmbot wrote:
@llvm/pr-subscribers-llvm-ir
Author: Stanislav Mekhanoshin (rampitec)
Changes
---
Patch is 122.29 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/151415.diff
28 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+4)
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (+138)
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+24)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+4)
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+23-1)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+1)
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+1)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.td (+2)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+1)
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+51)
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+16)
- (added) llvm/test/CodeGen/AMDGPU/code-size-estimate-gfx1250.ll (+28)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll (+539)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s (+145)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s (+145)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s (+64)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s (+64)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s (+64)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s (+64)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s (+25)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt (+167-3)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt (+64)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt (+64)
``diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index ec00fadf3039a..172ac467f7cad 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -702,6 +702,10 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc",
"gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc",
"gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc",
"gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 2595442ba7f9e..1c67fc3879bff 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -398,6 +398,144 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
}
+// CHECK-LABEL: @test_cvt_pk_bf8_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT:[[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8,
addrspace(5)
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:[[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[OUT_ADDR]] to ptr
+// CHECK-NEXT:[[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[A_ADDR]] to ptr
+// CHECK-NEXT:store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]],
align 8
+// CHECK-NEXT:store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align
4
+// CHECK-NEXT:[[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x
half> [[TMP0]])
+// CHECK-NEXT:[[TMP2:%.*]] = load ptr addrspace(1), ptr
[[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT:ret void
+//
+void test_cvt_pk_bf8_f16(global short* out, half2 a)
+{
+ *out = __builtin_amdgcn_cvt_pk_bf8_f16(a);
+}
+
+// CHECK-LABEL: @test_cvt_pk_fp8_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT:[[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8,
addrspace(5)
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:[[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5
[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add v_cvt_sr|pk_bf8|fp8_f16 gfx1250 instructions (PR #151415)
llvmbot wrote:
@llvm/pr-subscribers-clang
Author: Stanislav Mekhanoshin (rampitec)
Changes
---
Patch is 122.29 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/151415.diff
28 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+4)
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (+138)
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+24)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+4)
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+23-1)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+1)
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+1)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1)
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.td (+2)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+1)
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+51)
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+16)
- (added) llvm/test/CodeGen/AMDGPU/code-size-estimate-gfx1250.ll (+28)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll (+539)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s (+145)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s (+145)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s (+64)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s (+64)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s (+64)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s (+64)
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s (+25)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt (+167-3)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt (+64)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt (+64)
``diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index ec00fadf3039a..172ac467f7cad 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -702,6 +702,10 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc",
"gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc",
"gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc",
"gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 2595442ba7f9e..1c67fc3879bff 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -398,6 +398,144 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
}
+// CHECK-LABEL: @test_cvt_pk_bf8_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT:[[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8,
addrspace(5)
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:[[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[OUT_ADDR]] to ptr
+// CHECK-NEXT:[[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[A_ADDR]] to ptr
+// CHECK-NEXT:store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]],
align 8
+// CHECK-NEXT:store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align
4
+// CHECK-NEXT:[[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x
half> [[TMP0]])
+// CHECK-NEXT:[[TMP2:%.*]] = load ptr addrspace(1), ptr
[[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT:ret void
+//
+void test_cvt_pk_bf8_f16(global short* out, half2 a)
+{
+ *out = __builtin_amdgcn_cvt_pk_bf8_f16(a);
+}
+
+// CHECK-LABEL: @test_cvt_pk_fp8_f16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT:[[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8,
addrspace(5)
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:[[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add v_cvt_sr|pk_bf8|fp8_f16 gfx1250 instructions (PR #151415)
https://github.com/rampitec ready_for_review https://github.com/llvm/llvm-project/pull/151415 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add v_cvt_sr|pk_bf8|fp8_f16 gfx1250 instructions (PR #151415)
rampitec wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/151415?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#151415** https://app.graphite.dev/github/pr/llvm/llvm-project/151415?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/151415?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#151389** https://app.graphite.dev/github/pr/llvm/llvm-project/151389?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#151385** https://app.graphite.dev/github/pr/llvm/llvm-project/151385?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#151379** https://app.graphite.dev/github/pr/llvm/llvm-project/151379?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/151415 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [NFC] test/lit.cfg.py formatting (PR #151218)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/151218
>From ed3dd6b28f8182e078a9c4ed78a6293bfabfc92f Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 29 Jul 2025 13:01:10 -0700
Subject: [PATCH] [NFC] test/lit.cfg.py formatting
---
llvm/test/lit.cfg.py | 11 +--
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 143cc3817bd08..1d190fd20e573 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -451,7 +451,7 @@ def version_int(ver):
"%llvmdylib",
"{}/libLLVM{}.{}".format(
config.llvm_shlib_dir, config.llvm_shlib_ext,
config.llvm_dylib_version
-)
+),
)
)
@@ -582,6 +582,7 @@ def have_ld64_plugin_support():
if have_ld64_plugin_support():
config.available_features.add("ld64_plugin")
+
def host_unwind_supports_jit():
# Do we expect the host machine to support JIT registration of clang's
# default unwind info format for the host (e.g. eh-frames, compact-unwind,
@@ -589,7 +590,7 @@ def host_unwind_supports_jit():
# Linux and the BSDs use DWARF eh-frames and all known unwinders support
# register_frame at minimum.
-if platform.system() in [ "Linux", "FreeBSD", "NetBSD" ]:
+if platform.system() in ["Linux", "FreeBSD", "NetBSD"]:
return True
# Windows does not support frame info without the ORC runtime.
@@ -602,10 +603,7 @@ def host_unwind_supports_jit():
# macOS 14.0.
if platform.system() == "Darwin":
-assert (
-"arm64" in config.host_triple
-or "x86_64" in config.host_triple
-)
+assert "arm64" in config.host_triple or "x86_64" in config.host_triple
if "x86_64" in config.host_triple:
return True
@@ -627,6 +625,7 @@ def host_unwind_supports_jit():
return False
+
if host_unwind_supports_jit():
config.available_features.add("host-unwind-supports-jit")
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Ignore FileCheck when profcheck is enabled (PR #151214)
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/151214 >From 218c4bfd978b253c112f3910b4e5f768f2df49b0 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 29 Jul 2025 12:55:59 -0700 Subject: [PATCH] fixes --- llvm/test/lit.cfg.py | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 1d190fd20e573..43c7cf677a913 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -18,7 +18,17 @@ config.name = "LLVM" # testFormat: The test format to use to interpret tests. -config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) +extra_substitutions = extra_substitutions = ( +[ +(r"\| not FileCheck .*", "| tee /dev/null"), +(r"\| FileCheck .*", "| tee /dev/null"), +] +if config.enable_profcheck +else [] +) +config.test_format = lit.formats.ShTest( +not llvm_config.use_lit_shell, extra_substitutions +) # suffixes: A list of file extensions to treat as test files. This is overriden # by individual lit.local.cfg files in the test subdirectories. @@ -278,6 +288,7 @@ def get_asan_rtlib(): ] ) + # Find (major, minor) version of ptxas def ptxas_version(ptxas): ptxas_cmd = subprocess.Popen([ptxas, "--version"], stdout=subprocess.PIPE) @@ -602,7 +613,6 @@ def host_unwind_supports_jit(): # compact-unwind only, and JIT'd registration is not available before # macOS 14.0. if platform.system() == "Darwin": - assert "arm64" in config.host_triple or "x86_64" in config.host_triple if "x86_64" in config.host_triple: ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
