[llvm-branch-commits] [lld] release/21.x: [LLD][X86] Match delayLoad thunk with MSVC (#149521) (PR #151307)

2025-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/151307

Backport 75b79c9238bc083cdff2d2364be40633fdf4d1ad

Requested by: @e-kud

>From c325db48499ba6332b6d598db489ec1804d4506f Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov 
Date: Mon, 28 Jul 2025 17:45:16 +0200
Subject: [PATCH] [LLD][X86] Match delayLoad thunk with MSVC (#149521)

Previously we saved registers in the shadow space of callee before
calling __delayLoadHelper2. Now we save arguments in the shadow space of
the caller and allocate shadow space for the callee.

Fixes #51941

-

Co-authored-by: Benjamin Santerre 
(cherry picked from commit 75b79c9238bc083cdff2d2364be40633fdf4d1ad)
---
 lld/COFF/DLL.cpp   | 58 +++
 lld/test/COFF/arm64ec-delayimport.test | 48 ++---
 lld/test/COFF/arm64x-delayimport.test  | 98 +-
 lld/test/COFF/delayimports.test| 14 ++--
 lld/test/COFF/delayimporttables.yaml   |  8 +--
 lld/test/COFF/giats.s  |  4 +-
 6 files changed, 111 insertions(+), 119 deletions(-)

diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index c327da28ce138..3ce8853adb2a2 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -244,40 +244,36 @@ static const uint8_t thunkX64[] = {
 };
 
 static const uint8_t tailMergeX64[] = {
-0x51,   // pushrcx
-0x52,   // pushrdx
-0x41, 0x50, // pushr8
-0x41, 0x51, // pushr9
-0x48, 0x83, 0xEC, 0x48, // sub rsp, 48h
-0x66, 0x0F, 0x7F, 0x04, 0x24,   // movdqa  xmmword ptr [rsp], xmm0
-0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x10, // movdqa  xmmword ptr [rsp+10h], xmm1
-0x66, 0x0F, 0x7F, 0x54, 0x24, 0x20, // movdqa  xmmword ptr [rsp+20h], xmm2
-0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x30, // movdqa  xmmword ptr [rsp+30h], xmm3
-0x48, 0x8B, 0xD0,   // mov rdx, rax
-0x48, 0x8D, 0x0D, 0, 0, 0, 0,   // lea rcx, [___DELAY_IMPORT_...]
-0xE8, 0, 0, 0, 0,   // call__delayLoadHelper2
-0x66, 0x0F, 0x6F, 0x04, 0x24,   // movdqa  xmm0, xmmword ptr [rsp]
-0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x10, // movdqa  xmm1, xmmword ptr [rsp+10h]
-0x66, 0x0F, 0x6F, 0x54, 0x24, 0x20, // movdqa  xmm2, xmmword ptr [rsp+20h]
-0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x30, // movdqa  xmm3, xmmword ptr [rsp+30h]
-0x48, 0x83, 0xC4, 0x48, // add rsp, 48h
-0x41, 0x59, // pop r9
-0x41, 0x58, // pop r8
-0x5A,   // pop rdx
-0x59,   // pop rcx
-0xFF, 0xE0, // jmp rax
+0x48, 0x89, 0x4C, 0x24, 0x08,  // movqword ptr [rsp+8], rcx
+0x48, 0x89, 0x54, 0x24, 0x10,  // movqword ptr [rsp+10h], rdx
+0x4C, 0x89, 0x44, 0x24, 0x18,  // movqword ptr [rsp+18h], r8
+0x4C, 0x89, 0x4C, 0x24, 0x20,  // movqword ptr [rsp+20h], r9
+0x48, 0x83, 0xEC, 0x68,// subrsp, 68h
+0x66, 0x0F, 0x7F, 0x44, 0x24, 0x20,// movdqa xmmword ptr [rsp+20h], 
xmm0
+0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x30,// movdqa xmmword ptr [rsp+30h], 
xmm1
+0x66, 0x0F, 0x7F, 0x54, 0x24, 0x40,// movdqa xmmword ptr [rsp+40h], 
xmm2
+0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x50,// movdqa xmmword ptr [rsp+50h], 
xmm3
+0x48, 0x8B, 0xD0,  // movrdx, rax
+0x48, 0x8D, 0x0D, 0, 0, 0, 0,  // learcx, [___DELAY_IMPORT_...]
+0xE8, 0, 0, 0, 0,  // call   __delayLoadHelper2
+0x66, 0x0F, 0x6F, 0x44, 0x24, 0x20,// movdqa xmm0, xmmword ptr 
[rsp+20h]
+0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x30,// movdqa xmm1, xmmword ptr 
[rsp+30h]
+0x66, 0x0F, 0x6F, 0x54, 0x24, 0x40,// movdqa xmm2, xmmword ptr 
[rsp+40h]
+0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x50,// movdqa xmm3, xmmword ptr 
[rsp+50h]
+0x48, 0x8B, 0x4C, 0x24, 0x70,  // movrcx, qword ptr [rsp+70h]
+0x48, 0x8B, 0x54, 0x24, 0x78,  // movrdx, qword ptr [rsp+78h]
+0x4C, 0x8B, 0x84, 0x24, 0x80, 0, 0, 0, // movr8, qword ptr [rsp+80h]
+0x4C, 0x8B, 0x8C, 0x24, 0x88, 0, 0, 0, // movr9, qword ptr [rsp+88h]
+0x48, 0x83, 0xC4, 0x68,// addrsp, 68h
+0xFF, 0xE0,// jmprax
 };
 
 static const uint8_t tailMergeUnwindInfoX64[] = {
 0x01,   // Version=1, Flags=UNW_FLAG_NHANDLER
-0x0a,   // Size of prolog
-0x05,   // Count of unwind codes
+0x18,   // Size of prolog
+0x01,   // Count of unwind codes
 0x00,   // No frame register
-0x0a, 0x82, // Offset 0xa: UWOP_ALLOC_SMALL(0x48)
-0x06, 0x02, // Offset 6: UWOP_ALLOC_SMALL(8)
-0x04, 0x02, // Offset 4: UWOP_ALLOC_SMALL(8)
-0x02, 0x02, // Offset 2: UWOP_ALLOC_SMALL(8)
-   

[llvm-branch-commits] [lld] release/21.x: [LLD][X86] Match delayLoad thunk with MSVC (#149521) (PR #151307)

2025-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/151307
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] release/21.x: [LLD][X86] Match delayLoad thunk with MSVC (#149521) (PR #151307)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:

@cjacek What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/151307
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] release/21.x: [LLD][X86] Match delayLoad thunk with MSVC (#149521) (PR #151307)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-lld

Author: None (llvmbot)


Changes

Backport 75b79c9238bc083cdff2d2364be40633fdf4d1ad

Requested by: @e-kud

---

Patch is 22.51 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/151307.diff


6 Files Affected:

- (modified) lld/COFF/DLL.cpp (+27-31) 
- (modified) lld/test/COFF/arm64ec-delayimport.test (+24-24) 
- (modified) lld/test/COFF/arm64x-delayimport.test (+49-49) 
- (modified) lld/test/COFF/delayimports.test (+5-9) 
- (modified) lld/test/COFF/delayimporttables.yaml (+4-4) 
- (modified) lld/test/COFF/giats.s (+2-2) 


``diff
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index c327da28ce138..3ce8853adb2a2 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -244,40 +244,36 @@ static const uint8_t thunkX64[] = {
 };
 
 static const uint8_t tailMergeX64[] = {
-0x51,   // pushrcx
-0x52,   // pushrdx
-0x41, 0x50, // pushr8
-0x41, 0x51, // pushr9
-0x48, 0x83, 0xEC, 0x48, // sub rsp, 48h
-0x66, 0x0F, 0x7F, 0x04, 0x24,   // movdqa  xmmword ptr [rsp], xmm0
-0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x10, // movdqa  xmmword ptr [rsp+10h], xmm1
-0x66, 0x0F, 0x7F, 0x54, 0x24, 0x20, // movdqa  xmmword ptr [rsp+20h], xmm2
-0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x30, // movdqa  xmmword ptr [rsp+30h], xmm3
-0x48, 0x8B, 0xD0,   // mov rdx, rax
-0x48, 0x8D, 0x0D, 0, 0, 0, 0,   // lea rcx, [___DELAY_IMPORT_...]
-0xE8, 0, 0, 0, 0,   // call__delayLoadHelper2
-0x66, 0x0F, 0x6F, 0x04, 0x24,   // movdqa  xmm0, xmmword ptr [rsp]
-0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x10, // movdqa  xmm1, xmmword ptr [rsp+10h]
-0x66, 0x0F, 0x6F, 0x54, 0x24, 0x20, // movdqa  xmm2, xmmword ptr [rsp+20h]
-0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x30, // movdqa  xmm3, xmmword ptr [rsp+30h]
-0x48, 0x83, 0xC4, 0x48, // add rsp, 48h
-0x41, 0x59, // pop r9
-0x41, 0x58, // pop r8
-0x5A,   // pop rdx
-0x59,   // pop rcx
-0xFF, 0xE0, // jmp rax
+0x48, 0x89, 0x4C, 0x24, 0x08,  // movqword ptr [rsp+8], rcx
+0x48, 0x89, 0x54, 0x24, 0x10,  // movqword ptr [rsp+10h], rdx
+0x4C, 0x89, 0x44, 0x24, 0x18,  // movqword ptr [rsp+18h], r8
+0x4C, 0x89, 0x4C, 0x24, 0x20,  // movqword ptr [rsp+20h], r9
+0x48, 0x83, 0xEC, 0x68,// subrsp, 68h
+0x66, 0x0F, 0x7F, 0x44, 0x24, 0x20,// movdqa xmmword ptr [rsp+20h], 
xmm0
+0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x30,// movdqa xmmword ptr [rsp+30h], 
xmm1
+0x66, 0x0F, 0x7F, 0x54, 0x24, 0x40,// movdqa xmmword ptr [rsp+40h], 
xmm2
+0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x50,// movdqa xmmword ptr [rsp+50h], 
xmm3
+0x48, 0x8B, 0xD0,  // movrdx, rax
+0x48, 0x8D, 0x0D, 0, 0, 0, 0,  // learcx, [___DELAY_IMPORT_...]
+0xE8, 0, 0, 0, 0,  // call   __delayLoadHelper2
+0x66, 0x0F, 0x6F, 0x44, 0x24, 0x20,// movdqa xmm0, xmmword ptr 
[rsp+20h]
+0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x30,// movdqa xmm1, xmmword ptr 
[rsp+30h]
+0x66, 0x0F, 0x6F, 0x54, 0x24, 0x40,// movdqa xmm2, xmmword ptr 
[rsp+40h]
+0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x50,// movdqa xmm3, xmmword ptr 
[rsp+50h]
+0x48, 0x8B, 0x4C, 0x24, 0x70,  // movrcx, qword ptr [rsp+70h]
+0x48, 0x8B, 0x54, 0x24, 0x78,  // movrdx, qword ptr [rsp+78h]
+0x4C, 0x8B, 0x84, 0x24, 0x80, 0, 0, 0, // movr8, qword ptr [rsp+80h]
+0x4C, 0x8B, 0x8C, 0x24, 0x88, 0, 0, 0, // movr9, qword ptr [rsp+88h]
+0x48, 0x83, 0xC4, 0x68,// addrsp, 68h
+0xFF, 0xE0,// jmprax
 };
 
 static const uint8_t tailMergeUnwindInfoX64[] = {
 0x01,   // Version=1, Flags=UNW_FLAG_NHANDLER
-0x0a,   // Size of prolog
-0x05,   // Count of unwind codes
+0x18,   // Size of prolog
+0x01,   // Count of unwind codes
 0x00,   // No frame register
-0x0a, 0x82, // Offset 0xa: UWOP_ALLOC_SMALL(0x48)
-0x06, 0x02, // Offset 6: UWOP_ALLOC_SMALL(8)
-0x04, 0x02, // Offset 4: UWOP_ALLOC_SMALL(8)
-0x02, 0x02, // Offset 2: UWOP_ALLOC_SMALL(8)
-0x01, 0x02, // Offset 1: UWOP_ALLOC_SMALL(8)
+0x18, 0xC2, // Offset 0x18: UWOP_ALLOC_SMALL(0x68)
 0x00, 0x00  // Padding to align on 32-bits
 };
 
@@ -378,8 +374,8 @@ class TailMergeChunkX64 : public NonSectionCodeChunk {
 
   void writeTo(uint8_t *buf) const override {
 memcpy(buf, tailMergeX64, sizeof(tailMergeX64));
-write32le(buf + 39, desc->getRVA() - rva - 43);
-write32le(buf + 44, helper->getRVA() - rva - 48);
+write32le

[llvm-branch-commits] [llvm] release/21.x: [AArch64] Keep floating-point conversion in SIMD (#147707) (PR #151317)

2025-07-30 Thread Guy David via llvm-branch-commits

guy-david wrote:

This optimization is important for us to land in this release, since it affects 
internal workloads we care about. It's rather conservative and concerns a very 
specific pattern around floating-point conversions in the AArch64 backend. 
Safety is above all, so we should definitely wait for the the more expensive 
bots to complete their verification.

https://github.com/llvm/llvm-project/pull/151317
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish edited 
https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish approved this pull request.

lgtm. It would be good to get an approval from llvm-profgen owners.

cc: @WenleiHe @wlei-llvm 

https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits


@@ -1027,6 +1027,20 @@ class FunctionSamples {
 return VirtualCallsiteTypeCounts[mapIRLocToProfileLoc(Loc)];
   }
 
+  /// At location \p Loc, add a type sample for the given \p Type with
+  /// \p Count. This function uses saturating arithmetic to clamp the result to
+  /// maximum uint64_t (the counter type) and returns counter_overflow to 
caller
+  /// if the actual result is larger than maximum uint64_t.
+  sampleprof_error addTypeSamplesAt(const LineLocation &Loc, FunctionId Type,
+uint64_t Count) {
+auto &TypeCounts = getTypeSamplesAt(Loc);
+bool Overflowed = false;
+TypeCounts[Type] = SaturatingMultiplyAdd(Count, /* Weight= */ (uint64_t)1,

snehasish wrote:

I don't think we should insert into the map if it overflowed. Can you check the 
overflow first and then insert?

https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits


@@ -344,6 +350,36 @@ void 
ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile &Obj,
 exitWithError("no executable segment found", FileName);
 }
 
+uint64_t ProfiledBinary::CanonicalizeNonTextAddress(uint64_t Address) {
+  uint64_t FileOffset = 0;
+  auto MMapIter = NonTextMMapEvents.lower_bound(Address);
+  if (MMapIter == NonTextMMapEvents.end())
+return Address; // No non-text mmap event found, return the address as is.
+
+  const auto &MMapEvent = MMapIter->second;
+
+  // If the address is within the non-text mmap event, calculates its file

snehasish wrote:

typo: s/calculates/calculate/

https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits


@@ -611,23 +635,11 @@ class PerfScriptReader : public PerfReaderBase {
   static SmallVector TempFileCleanups;
 
 protected:
-  // The parsed MMap event
-  struct MMapEvent {
-int64_t PID = 0;
-uint64_t Address = 0;
-uint64_t Size = 0;
-uint64_t Offset = 0;
-StringRef BinaryPath;
-  };
-
   // Check whether a given line is LBR sample
   static bool isLBRSample(StringRef Line);
   // Check whether a given line is MMAP event
   static bool isMMapEvent(StringRef Line);
-  // Parse a single line of a PERF_RECORD_MMAP event looking for a
-  // mapping between the binary name and its memory layout.
-  static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
-MMapEvent &MMap);
+

snehasish wrote:

nit: extra newline? The existing code doesn't have new lines between decls.

https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits


@@ -946,6 +978,14 @@ SampleContextFrameVector ProfiledBinary::symbolize(const 
InstructionPointer &IP,
   return CallStack;
 }
 
+StringRef ProfiledBinary::symbolizeDataAddress(uint64_t Address) {
+  DIGlobal DataDIGlobal = unwrapOrError(
+  Symbolizer->symbolizeData(SymbolizerPath.str(), {Address, 0}),

snehasish wrote:

Thanks for the detailed followup.

https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFCI][ELF] Merge AgainstSymbol and AgainstSymbolWithTargetVA (PR #150798)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150798


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] dc6171d - Revert "[mlir][spirv] Fix UpdateVCEPass to deduce the correct set of capabili…"

2025-07-30 Thread via llvm-branch-commits

Author: Igor Wodiany
Date: 2025-07-30T17:06:21+01:00
New Revision: dc6171d58e26aba18bb367a73a2e8a7cbf0e1f22

URL: 
https://github.com/llvm/llvm-project/commit/dc6171d58e26aba18bb367a73a2e8a7cbf0e1f22
DIFF: 
https://github.com/llvm/llvm-project/commit/dc6171d58e26aba18bb367a73a2e8a7cbf0e1f22.diff

LOG: Revert "[mlir][spirv] Fix UpdateVCEPass to deduce the correct set of 
capabili…"

This reverts commit 3d4f1fee48689465b5026f75414247307db7d34d.

Added: 


Modified: 
mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp
mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir

Removed: 




diff  --git a/mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp 
b/mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp
index 9b1c84ee66156..6a9b951ca61d6 100644
--- a/mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp
+++ b/mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp
@@ -95,13 +95,6 @@ static LogicalResult checkAndUpdateCapabilityRequirements(
   return success();
 }
 
-static void addAllImpliedCapabilities(SetVector &caps) {
-  for (spirv::Capability cap : caps) {
-ArrayRef impliedCaps = 
getDirectImpliedCapabilities(cap);
-caps.insert_range(impliedCaps);
-  }
-}
-
 void UpdateVCEPass::runOnOperation() {
   spirv::ModuleOp module = getOperation();
 
@@ -175,8 +168,6 @@ void UpdateVCEPass::runOnOperation() {
 return WalkResult::interrupt();
 }
 
-addAllImpliedCapabilities(deducedCapabilities);
-
 return WalkResult::advance();
   });
 

diff  --git a/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir 
b/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir
index d657633665876..2b237665ffc4a 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir
@@ -7,7 +7,7 @@
 // Test deducing minimal version.
 // spirv.IAdd is available from v1.0.
 
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
 spirv.module Logical GLSL450 attributes {
   spirv.target_env = #spirv.target_env<
 #spirv.vce, #spirv.resource_limits<>>
@@ -21,7 +21,7 @@ spirv.module Logical GLSL450 attributes {
 // Test deducing minimal version.
 // spirv.GroupNonUniformBallot is available since v1.3.
 
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
 spirv.module Logical GLSL450 attributes {
   spirv.target_env = #spirv.target_env<
 #spirv.vce, 
#spirv.resource_limits<>>
@@ -32,7 +32,7 @@ spirv.module Logical GLSL450 attributes {
   }
 }
 
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
 spirv.module Logical GLSL450 attributes {
   spirv.target_env = #spirv.target_env<#spirv.vce, 
#spirv.resource_limits<>>
 } {
@@ -48,7 +48,7 @@ spirv.module Logical GLSL450 attributes {
 
 // Test minimal capabilities.
 
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
 spirv.module Logical GLSL450 attributes {
   spirv.target_env = #spirv.target_env<
 #spirv.vce, #spirv.resource_limits<>>
@@ -61,10 +61,10 @@ spirv.module Logical GLSL450 attributes {
 
 // Test Physical Storage Buffers are deduced correctly.
 
-// CHECK: spirv.module PhysicalStorageBuffer64 GLSL450 requires 
#spirv.vce
+// CHECK: spirv.module PhysicalStorageBuffer64 GLSL450 requires 
#spirv.vce
 spirv.module PhysicalStorageBuffer64 GLSL450 attributes {
   spirv.target_env = #spirv.target_env<
-#spirv.vce, #spirv.resource_limits<>>
+#spirv.vce, #spirv.resource_limits<>>
 } {
   spirv.func @physical_ptr(%val : !spirv.ptr { 
spirv.decoration = #spirv.decoration }) "None" {
 spirv.Return
@@ -74,7 +74,7 @@ spirv.module PhysicalStorageBuffer64 GLSL450 attributes {
 // Test deducing implied capability.
 // AtomicStorage implies Shader.
 
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
 spirv.module Logical GLSL450 attributes {
   spirv.target_env = #spirv.target_env<
 #spirv.vce, #spirv.resource_limits<>>
@@ -95,7 +95,7 @@ spirv.module Logical GLSL450 attributes {
 // * GroupNonUniformArithmetic
 // * GroupNonUniformBallot
 
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
 spirv.module Logical GLSL450 attributes {
   spirv.target_env = #spirv.target_env<
 #spirv.vce, 
#spirv.resource_limits<>>
@@ -106,7 +106,7 @@ spirv.module Logical GLSL450 attributes {
   }
 }
 
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
 spirv.module Logical GLSL450 attributes {
   spirv.target_env = #spirv.target_env<
 #spirv.vce, #spirv.resource_limits<>>
@@ -120,7 +120,7 @@ spirv.module Logical GLSL450 attributes {
 // Test type required capabilities
 
 // Using 8-bit integers in non-interface storage class requires Int8.
-// CHECK: requires #spirv.vce
+// CHECK: requires #spirv.vce
 spirv.module Logical GLSL450 attributes {
   spirv.target_env = #spirv.target_env<
 #spirv.vce, #spirv.resource_limits<>>
@@ -132,7 +132,7 @@ spirv.module Logical GLSL450 attributes {
 }
 
 // Using 16-bit floats in non-interface storag

[llvm-branch-commits] [NFCI][ELF][Mips] Replace MipsMultiGotPage with new RE_MIPS_OSEC_LOCAL_PAGE (PR #150810)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150810


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFCI][ELF][Mips] Replace MipsMultiGotPage with new RE_MIPS_OSEC_LOCAL_PAGE (PR #150810)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150810


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [NFC][ELF] Don't duplicate DynamicReloc constructor (PR #150811)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150811


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][ELF] Don't duplicate DynamicReloc constructor (PR #150811)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150811


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFCI][ELF] Store DynamicReloc Kind as two bools (PR #150812)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150812


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][ELF] Replace DynamicReloc::Kind with the equivalent bool in APIs (PR #150813)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150813


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFCI][ELF] Store DynamicReloc Kind as two bools (PR #150812)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150812


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][ELF] Replace DynamicReloc::Kind with the equivalent bool in APIs (PR #150813)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150813


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [NFCI][ELF] Introduce explicit Computed state for DynamicReloc (PR #150799)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150799

>From 1308e1aad30d7089f658832150854b1362c63f45 Mon Sep 17 00:00:00 2001
From: Jessica Clarke 
Date: Sat, 26 Jul 2025 22:05:06 +0100
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20change?=
 =?UTF-8?q?s=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.5

[skip ci]
---
 lld/ELF/Config.h| 2 ++
 lld/ELF/Driver.cpp  | 1 +
 lld/ELF/Relocations.cpp | 3 +--
 lld/ELF/Target.cpp  | 3 +--
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index d9639b06ca4bf..958e5caaf0dfa 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -701,6 +701,8 @@ struct Ctx : CommonLinkerContext {
   std::unique_ptr tar;
   // InputFile for linker created symbols with no source location.
   InputFile *internalFile = nullptr;
+  // Dummy Undefined for relocations without a symbol.
+  Undefined *dummySym = nullptr;
   // True if symbols can be exported (isExported) or preemptible.
   bool hasDynsym = false;
   // True if SHT_LLVM_SYMPART is used.
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 21d228eda6470..4dcf577ebcb16 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -3138,6 +3138,7 @@ template  void 
LinkerDriver::link(opt::InputArgList &args) {
 ctx.symtab->insert(arg->getValue())->traced = true;
 
   ctx.internalFile = createInternalFile(ctx, "");
+  ctx.dummySym = make(ctx.internalFile, "", STB_LOCAL, 0, 0);
 
   // Handle -u/--undefined before input files. If both a.a and b.so define foo,
   // -u foo a.a b.so will extract a.a.
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index bd22fe2f1aa25..e847e85b060fe 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1948,13 +1948,12 @@ void elf::postScanRelocations(Ctx &ctx) {
 
   GotSection *got = ctx.in.got.get();
   if (ctx.needsTlsLd.load(std::memory_order_relaxed) && got->addTlsIndex()) {
-static Undefined dummy(ctx.internalFile, "", STB_LOCAL, 0, 0);
 if (ctx.arg.shared)
   ctx.mainPart->relaDyn->addReloc(
   {ctx.target->tlsModuleIndexRel, got, got->getTlsIndexOff()});
 else
   got->addConstant({R_ADDEND, ctx.target->symbolicRel,
-got->getTlsIndexOff(), 1, &dummy});
+got->getTlsIndexOff(), 1, ctx.dummySym});
   }
 
   assert(ctx.symAux.size() == 1);
diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp
index ad7d57d30668d..4946484074d05 100644
--- a/lld/ELF/Target.cpp
+++ b/lld/ELF/Target.cpp
@@ -105,10 +105,9 @@ ErrorPlace elf::getErrorPlace(Ctx &ctx, const uint8_t 
*loc) {
 if (isecLoc <= loc && loc < isecLoc + isec->getSize()) {
   std::string objLoc = isec->getLocation(loc - isecLoc);
   // Return object file location and source file location.
-  Undefined dummy(ctx.internalFile, "", STB_LOCAL, 0, 0);
   ELFSyncStream msg(ctx, DiagLevel::None);
   if (isec->file)
-msg << isec->getSrcMsg(dummy, loc - isecLoc);
+msg << isec->getSrcMsg(*ctx.dummySym, loc - isecLoc);
   return {isec, objLoc + ": ", std::string(msg.str())};
 }
   }

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFCI][ELF] Merge AddendOnly and AddendOnlyWithTargetVA (PR #150797)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150797


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFCI][ELF] Merge AddendOnly and AddendOnlyWithTargetVA (PR #150797)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150797


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [ELF][Mips] Fix addend for preemptible static TLS (PR #150729)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150729

>From 32400cb0d5c16e16b6d0d259955ba060f561fefe Mon Sep 17 00:00:00 2001
From: Jessica Clarke 
Date: Sat, 26 Jul 2025 02:12:18 +0100
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.5
---
 lld/ELF/SyntheticSections.cpp | 16 
 lld/ELF/SyntheticSections.h   |  9 +
 lld/test/ELF/mips-mgot.s  |  2 +-
 lld/test/ELF/mips-tls-64.s|  2 +-
 lld/test/ELF/mips-tls.s   |  2 +-
 5 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index efec41a737b62..0bb00c6d2bcff 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1065,9 +1065,8 @@ void MipsGotSection::build() {
   // for the TP-relative offset as we don't know how much other data will
   // be allocated before us in the static TLS block.
   if (s->isPreemptible || ctx.arg.shared)
-ctx.mainPart->relaDyn->addReloc(
-{ctx.target->tlsGotRel, this, offset,
- DynamicReloc::AgainstSymbolWithTargetVA, *s, 0, R_ABS});
+ctx.mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible(
+ctx.target->tlsGotRel, *this, offset, *s, ctx.target->symbolicRel);
 }
 for (std::pair &p : got.dynTlsSymbols) {
   Symbol *s = p.first;
@@ -1160,6 +1159,7 @@ void MipsGotSection::writeTo(uint8_t *buf) {
   // if we had to do this.
   writeUint(ctx, buf + ctx.arg.wordsize,
 (uint64_t)1 << (ctx.arg.wordsize * 8 - 1));
+  ctx.target->relocateAlloc(*this, buf);
   for (const FileGot &g : gots) {
 auto write = [&](size_t i, const Symbol *s, int64_t a) {
   uint64_t va = a;
@@ -1189,9 +1189,10 @@ void MipsGotSection::writeTo(uint8_t *buf) {
 write(p.second, p.first, 0);
 for (const std::pair &p : g.relocs)
   write(p.second, p.first, 0);
-for (const std::pair &p : g.tls)
-  write(p.second, p.first,
-p.first->isPreemptible || ctx.arg.shared ? 0 : -0x7000);
+for (const std::pair &p : g.tls) {
+  if (!p.first->isPreemptible && !ctx.arg.shared)
+write(p.second, p.first, -0x7000);
+}
 for (const std::pair &p : g.dynTlsSymbols) {
   if (p.first == nullptr && !ctx.arg.shared)
 write(p.second, nullptr, 1);
@@ -1653,8 +1654,7 @@ int64_t DynamicReloc::computeAddend(Ctx &ctx) const {
   case AgainstSymbol:
 assert(sym != nullptr);
 return addend;
-  case AddendOnlyWithTargetVA:
-  case AgainstSymbolWithTargetVA: {
+  case AddendOnlyWithTargetVA: {
 uint64_t ca = inputSec->getRelocTargetVA(
 ctx, Relocation{expr, type, 0, addend, sym}, getOffset());
 return ctx.arg.is64 ? ca : SignExtend64<32>(ca);
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 5f01513630597..7612915b5b1dc 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -429,11 +429,6 @@ class DynamicReloc {
 /// The resulting dynamic relocation references symbol #sym from the 
dynamic
 /// symbol table and uses #addend as the value of computeAddend(ctx).
 AgainstSymbol,
-/// The resulting dynamic relocation references symbol #sym from the 
dynamic
-/// symbol table and uses InputSection::getRelocTargetVA() + #addend for 
the
-/// final addend. It can be used for relocations that write the symbol VA 
as
-// the addend (e.g. R_MIPS_TLS_TPREL64) but still reference the symbol.
-AgainstSymbolWithTargetVA,
 /// This is used by the MIPS multi-GOT implementation. It relocates
 /// addresses of 64kb pages that lie inside the output section.
 MipsMultiGotPage,
@@ -460,9 +455,7 @@ class DynamicReloc {
 
   uint64_t getOffset() const;
   uint32_t getSymIndex(SymbolTableBaseSection *symTab) const;
-  bool needsDynSymIndex() const {
-return kind == AgainstSymbol || kind == AgainstSymbolWithTargetVA;
-  }
+  bool needsDynSymIndex() const { return kind == AgainstSymbol; }
 
   /// Computes the addend of the dynamic relocation. Note that this is not the
   /// same as the #addend member variable as it may also include the symbol
diff --git a/lld/test/ELF/mips-mgot.s b/lld/test/ELF/mips-mgot.s
index 6978b5d9623b4..67bd5e6619f12 100644
--- a/lld/test/ELF/mips-mgot.s
+++ b/lld/test/ELF/mips-mgot.s
@@ -23,7 +23,7 @@
 
 # CHECK:  Contents of section .got:
 # CHECK-NEXT:  7  8000 [[FOO0]] [[FOO2]]
-# CHECK-NEXT:  70010  0004 0001 0002
+# CHECK-NEXT:  70010   0001 0002
 # CHECK-NEXT:  70020 0003 0004 0005 0006
 # CHECK-NEXT:  70030    
 # CHECK-NEXT:  70040   
diff --git a/lld/test/ELF/mips-tls-64.s b/lld/test/ELF/mips-tls-64.s
index 3976b50274be4..8a00b93c77e2f 100644
--- a/ll

[llvm-branch-commits] [lld] [ELF][Mips] Fix addend for preemptible static TLS (PR #150729)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150729

>From 32400cb0d5c16e16b6d0d259955ba060f561fefe Mon Sep 17 00:00:00 2001
From: Jessica Clarke 
Date: Sat, 26 Jul 2025 02:12:18 +0100
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.5
---
 lld/ELF/SyntheticSections.cpp | 16 
 lld/ELF/SyntheticSections.h   |  9 +
 lld/test/ELF/mips-mgot.s  |  2 +-
 lld/test/ELF/mips-tls-64.s|  2 +-
 lld/test/ELF/mips-tls.s   |  2 +-
 5 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index efec41a737b62..0bb00c6d2bcff 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1065,9 +1065,8 @@ void MipsGotSection::build() {
   // for the TP-relative offset as we don't know how much other data will
   // be allocated before us in the static TLS block.
   if (s->isPreemptible || ctx.arg.shared)
-ctx.mainPart->relaDyn->addReloc(
-{ctx.target->tlsGotRel, this, offset,
- DynamicReloc::AgainstSymbolWithTargetVA, *s, 0, R_ABS});
+ctx.mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible(
+ctx.target->tlsGotRel, *this, offset, *s, ctx.target->symbolicRel);
 }
 for (std::pair &p : got.dynTlsSymbols) {
   Symbol *s = p.first;
@@ -1160,6 +1159,7 @@ void MipsGotSection::writeTo(uint8_t *buf) {
   // if we had to do this.
   writeUint(ctx, buf + ctx.arg.wordsize,
 (uint64_t)1 << (ctx.arg.wordsize * 8 - 1));
+  ctx.target->relocateAlloc(*this, buf);
   for (const FileGot &g : gots) {
 auto write = [&](size_t i, const Symbol *s, int64_t a) {
   uint64_t va = a;
@@ -1189,9 +1189,10 @@ void MipsGotSection::writeTo(uint8_t *buf) {
 write(p.second, p.first, 0);
 for (const std::pair &p : g.relocs)
   write(p.second, p.first, 0);
-for (const std::pair &p : g.tls)
-  write(p.second, p.first,
-p.first->isPreemptible || ctx.arg.shared ? 0 : -0x7000);
+for (const std::pair &p : g.tls) {
+  if (!p.first->isPreemptible && !ctx.arg.shared)
+write(p.second, p.first, -0x7000);
+}
 for (const std::pair &p : g.dynTlsSymbols) {
   if (p.first == nullptr && !ctx.arg.shared)
 write(p.second, nullptr, 1);
@@ -1653,8 +1654,7 @@ int64_t DynamicReloc::computeAddend(Ctx &ctx) const {
   case AgainstSymbol:
 assert(sym != nullptr);
 return addend;
-  case AddendOnlyWithTargetVA:
-  case AgainstSymbolWithTargetVA: {
+  case AddendOnlyWithTargetVA: {
 uint64_t ca = inputSec->getRelocTargetVA(
 ctx, Relocation{expr, type, 0, addend, sym}, getOffset());
 return ctx.arg.is64 ? ca : SignExtend64<32>(ca);
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index 5f01513630597..7612915b5b1dc 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -429,11 +429,6 @@ class DynamicReloc {
 /// The resulting dynamic relocation references symbol #sym from the 
dynamic
 /// symbol table and uses #addend as the value of computeAddend(ctx).
 AgainstSymbol,
-/// The resulting dynamic relocation references symbol #sym from the 
dynamic
-/// symbol table and uses InputSection::getRelocTargetVA() + #addend for 
the
-/// final addend. It can be used for relocations that write the symbol VA 
as
-// the addend (e.g. R_MIPS_TLS_TPREL64) but still reference the symbol.
-AgainstSymbolWithTargetVA,
 /// This is used by the MIPS multi-GOT implementation. It relocates
 /// addresses of 64kb pages that lie inside the output section.
 MipsMultiGotPage,
@@ -460,9 +455,7 @@ class DynamicReloc {
 
   uint64_t getOffset() const;
   uint32_t getSymIndex(SymbolTableBaseSection *symTab) const;
-  bool needsDynSymIndex() const {
-return kind == AgainstSymbol || kind == AgainstSymbolWithTargetVA;
-  }
+  bool needsDynSymIndex() const { return kind == AgainstSymbol; }
 
   /// Computes the addend of the dynamic relocation. Note that this is not the
   /// same as the #addend member variable as it may also include the symbol
diff --git a/lld/test/ELF/mips-mgot.s b/lld/test/ELF/mips-mgot.s
index 6978b5d9623b4..67bd5e6619f12 100644
--- a/lld/test/ELF/mips-mgot.s
+++ b/lld/test/ELF/mips-mgot.s
@@ -23,7 +23,7 @@
 
 # CHECK:  Contents of section .got:
 # CHECK-NEXT:  7  8000 [[FOO0]] [[FOO2]]
-# CHECK-NEXT:  70010  0004 0001 0002
+# CHECK-NEXT:  70010   0001 0002
 # CHECK-NEXT:  70020 0003 0004 0005 0006
 # CHECK-NEXT:  70030    
 # CHECK-NEXT:  70040   
diff --git a/lld/test/ELF/mips-tls-64.s b/lld/test/ELF/mips-tls-64.s
index 3976b50274be4..8a00b93c77e2f 100644
--- a/ll

[llvm-branch-commits] [NFCI][ELF][Mips] Refactor MipsGotSection to avoid explicit writes (PR #150730)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150730


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFCI][ELF][Mips] Refactor MipsGotSection to avoid explicit writes (PR #150730)

2025-07-30 Thread Jessica Clarke via llvm-branch-commits

https://github.com/jrtc27 updated 
https://github.com/llvm/llvm-project/pull/150730


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] release/21.x [ObjCARC] Delete empty autoreleasepools with no autoreleases in them and remove ObjCARCAPElimPass (PR #150771)

2025-07-30 Thread via llvm-branch-commits

https://github.com/AZero13 closed 
https://github.com/llvm/llvm-project/pull/150771
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)

2025-07-30 Thread via llvm-branch-commits

https://github.com/easyonaadit edited 
https://github.com/llvm/llvm-project/pull/150170
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)

2025-07-30 Thread via llvm-branch-commits

https://github.com/easyonaadit updated 
https://github.com/llvm/llvm-project/pull/150170

>From e9752fc4a85d5156f821c07ecf2b5962843cec99 Mon Sep 17 00:00:00 2001
From: Aaditya 
Date: Sat, 19 Jul 2025 12:57:27 +0530
Subject: [PATCH] Add builtins for wave reduction intrinsics

---
 a.out| Bin 0 -> 22264 bytes
 clang/include/clang/Basic/BuiltinsAMDGPU.def |  25 ++
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp  |  58 +++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl  | 378 +++
 4 files changed, 461 insertions(+)
 create mode 100755 a.out

diff --git a/a.out b/a.out
new file mode 100755
index 
..2dbcd9ad6edc6908ee25aacddc07417f96ca46f2
GIT binary patch
literal 22264
zcmeHPdvIJ=c|Te|A`)AY^8k^A`V*w8rTI;cD87TFRne<};OQ#6ET60PXl@gLt
zZ$4Ni>d_+`Jyxh{@ObL2z)x^FKcNj5d2&MidOXL6-{qx0e5a)&LK{u_hu3)N<$6u(xGzsH>%`
zBbd(w+qFHqkLR>?`)%3+-DqLDB;67M}
zdtw>dyA_~3u22`DRjI{qCF0(1CDQow(wV~hxdwlqgzYfm-aWNUeQ5z-qkaN@xRf4H
zuNWSX%Dr`EJzLt(0%LI3J%}c1rTP|l+MC$F4zY{WX~R1&|0!a%>bp2H3|_zQ5{0t=
zm*ERnCSS)EV>xbU>VM-C522U=)xaAZfo|e6g7ixKlzOAGz!1Cq=&GKk@l`$R`o24$
zY#S{|)Xuw3&b7y#WE#lmcqVNS8*DBa_d0eWnRZgid*WbY
z@ycRj$?RYz+vB8CJ(+YOIa
zNFhIVz7_k9A-5x%DY%NUwl3SwLXQc&tGxj3Ao3mMPlbS#zczpyezccDa`D3SKc*0|f3%Vlyy
zZZ4S~)s}d=?pQ8!w>{#-?5LA>SsFMbo{pt{>oZsJy-B)p`;Z$-X7aF^k9YLr9DCZ#
zS(f3w8rSu}aXn>TsGX*^Y;oMps2(|U{Yo&dxvl{Es@|5CS~D+>`sj?uV99%qZKNOV
z4m{70kauIz8*bp^txIhk=-=G4d$+wI*x`={H!a}W+Nd`gMHQ!7o7&pb1C>^yQJ*2Z
zJ=h-X^i&bw(4p36vhMm^COY1-G1wMt<&$=ODz#^P{YWysJ{UCrKxt~+Go_9UTfEs!
zo1vrnaMXvJTUg@{`*3sXYW$cFmuEc_j{9)-+ayT)NKE>MOq2GJ_?rlraMtv@*uC@T
zTlC@ne9!rCf4(pK@CE&z4_Z1W+BaQD&DZ*H>7y~B!H1(^D}_cMesKv^%JSi87)znq
zhp#H3O11j%)jmAr!|Qx_*oULxE`^8>Zz!Q?pMCfmAHLg%`=6`BKAe3j31dF|asffJ
zKKu$FzSoDp-iIIX;cxKaAM@eY`0yh>ywQgr_2F;y;Sc+8@A={-9`oT>`Q&R%uX8wJ
zY4q)uiu4a!XcRNK8!cryu7wV4+)~MOEV(BcD>x}@kCQ6I^GJ2$dDp&6KPT+Fojq|p
zkz$2t+;(CyyU^9)%VsIM)`+Jh0?}B@Zll;5G8VesBFhEOVgC
z)LFfjBId&;ve!)Z-Hb_5uK&Ld$2F>6&;L?aZpmRjAA;
zP*2ax%4Dld`WY%GBGj)(lVLN78ot5HPiWR;KO==Q^B&BPddH34@ppKFLXFzI&x{`D
zPc9@s$yfX$>-ZO1PUBwnyROHFSYU*5dcfd9#XWR?3TYN-_qN`xgIIPy_!VzDC+JOK
z=0EX6b?Ku)XEd9$v2>Mo)#rjvZZ!YN`@a&zjAkyL&)fN|6OBJIn#zngsm}*9i9|k5
zu{_q=Y99^iiBbD5OqG7w6R}4Mi9|eiP(*7z(dB;8V<^qL6J*|SOgqR*M9RnoZ<~%2&SC$Xu%ncAKQ_Qr+Wshp2td%
zy(gZ_V?FS}fSL-9JA3U!%5m+xGr7A=_OS2|%W>*yEIpoBWFq)m!E7$M2a8dqg854Q
zXfR*$_$7~dOg@`TrOZY6d@#Rnd?b_l*XtI^C3shCDp*J-6Pet&E~!0V)S9mc3;B57
zj_pHvvA&39)#zPM;WCu64m{>D{;a1Ji;(axN8MeIvwQ>eTeYobn
z>A>pyrted#Y5&Z}Ppk|C9t;EmA65sl&wGvhNARk~fu-M)50>h{QXM#99w%zbbc|$;
zUe=c79fC0vk+uC{HZ}i=dndlGbY*|ruG%WKlwA%5~MzD1>=~~VC
z{Me1_*CRSs7-@-*7uG|%Wgs~^=BDv7O-p|oZ#UshCjELxOE#DJKs@SNR(ofAsNL#{
zb%r`)p^fc{k=TZ4=cbOBv(ecQZQbNVH;uHn$Kwe+Z8j#l)~T}W+e_Ia^
zQNKjOgJczCAzRxzHncmf8#jhF#yVQNHjZ?3wRc9_V~J2G)IQSMk#LZ>A-Zv$8c3$m
zlf2Oy80fXybuL!(Yn+AOvkT&UZY%YRx$=(2+YGLW?=WO=8J0-WM+`2!(63d&zh4Dk
ztqb&vInfu|T?Nln!T+iX{>>`*vsLhS(lLQrCF3V4&krD9ssHuAz$^8?EZC{k|5Cwp
z#*M2WI+*CLsvuUCgJVv9OqnBbH>ZNROw4f|6~v2Ku9jwZ6mUG1%h*{AGx!o&a3r5s
zL2sB6jOINJ!?MGQ{&;e{Fb)}1TaE%0`^c^{0y>6lyqHH9?Wm;-<2Jj&7bM8R(S-<(
z;H^W9N-jj&H#G2uF^lv(y^j2zXmrtX=pMfwx8$REyD>Vps0OYWhX}eHC_V=X7o?Mm
zM+i=im=-1w%~qWn3sU>=#D7oL>)&
z`ttmj@us{4Sek!=j1FZ5=mbbz88=EhD)T-vFC^Un@t=&-_<0C1(LZkHvt*uXP#gsr
zhO+F{Hed_%WnRm4V9Mpo`JEMPG9SsGaepY+caT_)QT{^p_Za*l75?c3A3-PaOL`2
z2Kyr#Yp}$mrZrbBdq)g0Ud9=WOkVc&E<%W>w^_1wnNLN`N
zI7=}IT;3CIvAkMWBC0V>6g&io9i07>l%CO4%8f6_Q!$1f7E@frnl}$O+SFZx9S>^
z{pPybZgAeW20wwy8Jv*x*?Dd+6U}qg?j8rr5
zUn6oI@ojYtyNrAocn9p@5-bS+U4P9hZ9nUu`1YNVi62J}{^Z=?uD-UJwr3-UZ)$_e
z+!fz%RO*+BVEwiCYdy-Jd~QD67MXbAr4=u%LydC-FTVBZ72jhtGP&aG(2E>=S-F?N
z)Vuz3ZZ2}T^EA?muPj&U{U^nqr~fmfzv<}}KQ{XJivFs|RjBW=E+@6t&z!rrpDgM-X?1RiQyBwS6gb%ZK}nuvG_qs
z9y~R6a^5?MBi*+Tp$KCqkfw?@B_>XuR#9+~)4ffoB#0o}+G#DY9mhj6^@k3^)sacS
zPZEwy{O}0?s(7f-$4H_*TM0|RfNmPqWoGv;v
zI=SLvT>HpmxEPr{+M2giQaB)YF0_7J$1}1(*qh}B;89fOG
z#;5t5o#oecKBJ#Unl^f#w^t(tMjrw(M(;(mWHb$iM)?$bF;d(Q)@OE5r=HYhEp7k<
zO9L9y=iG*9$x@TXPfu@xrQ&J$hmN=zpsuDxK2GYd?S%p>^lSnG3C-i?XvPh_8f|st
z&|Km26H}e=_U)dv$i#|Yu7%>eXX@9WX4bD+-Zafy?#aVAJi$Zv)Eok9Jj-`tc)()4
zYo5hwke459KL!)9?OrihyoPp9ulREy3=pmLH(qEm22V7gP8L6h0-oTvZPR3*oXJ{h
z_v~?iZ0{nISFC74J$Y&NFMvGvoW>6Rv=)`~Z&8mUhi2Ts+!<}$lV=*ULY8x#-+K#=
zYoEul{@O5NxMs}F1bYvlQe
zn?0iWFOp|hZ}x!Z|C&6z-?LfGf0{fSqS;~1KS=&{;d
z&CY0kGkG=_v&S`mBY8G$vkz;&ojjgwIDgH*jXZh|IDgGwN1m%QvsukwNgmG-oWJHT
zCC}l(>>&6vTP;M<7eUR^!O(_?$IG`qS1t3BdEprN4I_i_{D#rb_m#=>n?`>+ziDLP
ziTDFA97E`0OH=&>LF}03=GR4dLJYDzeb&euvnp#MrWooO)Nlsk1g4gZ(OGCv~&vlSPXKWtY{3U%ynQz
zV>oO0a~KnnKeNIjQMiPfZ!r0z#%Z26%xe+YGBC*Z@tW}+R&KL2_j*IGH?+&pEr#B1
zXu{AvhCX2ELxw(L=;MYyZRkH4`tOFmWa!H6CVxY(H?+&pEr#B1Xu{AvhCX2ELxys(
zeQExe^V_e<6tl0UER3?<;I~|-%zfuwMRe_v5D3((8PhJw{m1)qIp&KOT2YD(UjXmP
zNIMoX0v2cuzF405AY=3G6w_-k;tX|JgiGNT#3=uakq?`c_k+^Dw1<>q7{!nBwRn6o
zNIUq4I(}K{{+IjuevMzQ=)ty1j1Fi88j4mj2Qf!5;xm|5#=1
zk_sldh?@1*r-I#^`&(QGD@^`%Dzh-_Em|4I##+_<8dhobDxPwvsu!%BU^a!7Ct1*&x0k7
zyuAgp(G0U*xN7FDlOe^kp%>9EV1dlGospzXuS9e12zE(G2eD&>9n0vQB<5p5TLVeG
zvj*R8>WkQ0c6M*;vj_TzcG=vK!{|UVGs3lM57OtQ6B$DyNXdvkbqyu;a!oLr&0@EQ
zflctCx8p{CvahWboZe=_b_WM{+2hLIIxzG;mP(ticz0ywcPYzTqQ33DY>i

[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 3 (PR #151310)

2025-07-30 Thread via llvm-branch-commits

easyonaadit wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/151310?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#150395** https://app.graphite.dev/github/pr/llvm/llvm-project/150395?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#150170** https://app.graphite.dev/github/pr/llvm/llvm-project/150170?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#151310** https://app.graphite.dev/github/pr/llvm/llvm-project/151310?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/151310?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#151309** https://app.graphite.dev/github/pr/llvm/llvm-project/151309?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#150169** https://app.graphite.dev/github/pr/llvm/llvm-project/150169?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/151310
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 2 (PR #151309)

2025-07-30 Thread via llvm-branch-commits

easyonaadit wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/151309?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#150395** https://app.graphite.dev/github/pr/llvm/llvm-project/150395?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#150170** https://app.graphite.dev/github/pr/llvm/llvm-project/150170?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#151310** https://app.graphite.dev/github/pr/llvm/llvm-project/151310?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#151309** https://app.graphite.dev/github/pr/llvm/llvm-project/151309?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/151309?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#150169** https://app.graphite.dev/github/pr/llvm/llvm-project/150169?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/151309
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [AArch64] Keep floating-point conversion in SIMD (#147707) (PR #151317)

2025-07-30 Thread Paul Walker via llvm-branch-commits

paulwalker-arm wrote:

> @paulwalker-arm What do you think about merging this PR to the release branch?

As far as I know the PR is not fixing an existing bug or performance regression 
so it depends if we've passed the point of accepting new optimisations. 
@guy-david will need to argue the case for the PRs importance based on their 
need.  The PR itself is specific to AArch64 so there is no danger to other 
targets.  That said, the PR has just landed so it seems prudent to wait for 
wider buildbot testing to complete before pulling into a release.

https://github.com/llvm/llvm-project/pull/151317
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Use GetOmpDirectiveName to find directive source loca… (PR #150955)

2025-07-30 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah approved this pull request.


https://github.com/llvm/llvm-project/pull/150955
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [llvm] Write out raw profile bytes in little endian. (PR #150375)

2025-07-30 Thread Teresa Johnson via llvm-branch-commits

https://github.com/teresajohnson approved this pull request.

lgtm but suggest updating the title and/or description to note this is for 
MemProf.

https://github.com/llvm/llvm-project/pull/150375
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] release/21.x: [LLD][X86] Match delayLoad thunk with MSVC (#149521) (PR #151307)

2025-07-30 Thread Jacek Caban via llvm-branch-commits

https://github.com/cjacek approved this pull request.


https://github.com/llvm/llvm-project/pull/151307
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy] [COFF] Ignore associative sections in executables (#151143) (PR #151336)

2025-07-30 Thread Jacek Caban via llvm-branch-commits

https://github.com/cjacek approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/151336
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [Analysis] Prevent revisiting block when searching for noreturn vars (#150582) (PR #151381)

2025-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/151381

Backport 330b40e11fd20e9a29b9c24de17e4ba23afeedc6

Requested by: @pawosm-arm

>From f0b310b07873d3a21ab8f9f0ca137e876fc0f4ec Mon Sep 17 00:00:00 2001
From: Serge Pavlov 
Date: Wed, 30 Jul 2025 20:40:07 +0700
Subject: [PATCH] [Analysis] Prevent revisiting block when searching for
 noreturn vars (#150582)

When searching for noreturn variable initializations, do not visit CFG
blocks that are already visited, it prevents hanging the analysis.

It must fix https://github.com/llvm/llvm-project/issues/150336.

(cherry picked from commit 330b40e11fd20e9a29b9c24de17e4ba23afeedc6)
---
 clang/lib/Sema/AnalysisBasedWarnings.cpp  |  4 
 clang/test/SemaCXX/noreturn-weverything.c | 15 +++
 2 files changed, 19 insertions(+)
 create mode 100644 clang/test/SemaCXX/noreturn-weverything.c

diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp 
b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 5e75c64eb2b9a..85ac3c06ec2c2 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -503,8 +503,12 @@ static bool areAllValuesNoReturn(const VarDecl *VD, const 
CFGBlock &VarBlk,
 
   TransferFunctions TF(VD);
   BackwardDataflowWorklist Worklist(*AC.getCFG(), AC);
+  llvm::DenseSet Visited;
   Worklist.enqueueBlock(&VarBlk);
   while (const CFGBlock *B = Worklist.dequeue()) {
+if (Visited.contains(B))
+  continue;
+Visited.insert(B);
 // First check the current block.
 for (CFGBlock::const_reverse_iterator ri = B->rbegin(), re = B->rend();
  ri != re; ++ri) {
diff --git a/clang/test/SemaCXX/noreturn-weverything.c 
b/clang/test/SemaCXX/noreturn-weverything.c
new file mode 100644
index 0..92a587d395639
--- /dev/null
+++ b/clang/test/SemaCXX/noreturn-weverything.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -fsyntax-only %s -Weverything
+
+void free(void *);
+typedef void (*set_free_func)(void *);
+struct Method {
+  int nparams;
+  int *param;
+};
+void selelem_free_method(struct Method* method, void* data) {
+set_free_func free_func = 0;
+for (int i = 0; i < method->nparams; ++i)
+free(&method->param[i]);
+if (data && free_func)
+free_func(data);
+}

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] release/21.x: [LLD][X86] Match delayLoad thunk with MSVC (#149521) (PR #151307)

2025-07-30 Thread Martin Storsjö via llvm-branch-commits

https://github.com/mstorsjo approved this pull request.


https://github.com/llvm/llvm-project/pull/151307
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)

2025-07-30 Thread via llvm-branch-commits

https://github.com/easyonaadit updated 
https://github.com/llvm/llvm-project/pull/150170

>From bc06c18461bd36dc8c732d04bc6fab6ebaa4c0d5 Mon Sep 17 00:00:00 2001
From: Aaditya 
Date: Sat, 19 Jul 2025 12:57:27 +0530
Subject: [PATCH] Add builtins for wave reduction intrinsics

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def |  25 ++
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp  |  58 +++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl  | 378 +++
 3 files changed, 461 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 878543566f0e3..b91b32457ff86 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -351,6 +351,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
 BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
 BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
 
+//===--===//
+
+// Wave Reduction builtins.
+
+//===--===//
+
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc")
+
 
//===--===//
 // R600-NI only builtins.
 
//===--===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp 
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 7dccf82b1a7a3..c3d9ec5fc8309 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -295,11 +295,69 @@ void 
CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
   Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
 }
 
+static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
+  switch (BuiltinID) {
+  default:
+llvm_unreachable("Unknown BuiltinID for wave reduction");
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
+return Intrinsic::amdgcn_wave_reduce_add;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
+return Intrinsic::amdgcn_wave_reduce_sub;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
+return Intrinsic::amdgcn_wave_reduce_min;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
+return Intrinsic::amdgcn_wave_reduce_umin;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
+return Intrinsic::amdgcn_wave_reduce_max;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
+return Intrinsic::amdgcn_wave_reduce_umax;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
+return Intrinsic::amdgcn_wave_reduce_and;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
+return Intrinsic::amdgcn_wave_reduce_or;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
+return Intrinsic::amdgcn_wave_reduce_xor;
+  }
+}
+
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   const CallExpr *E) {
   llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
   llvm::SyncScope::ID SSID;
   switch (BuiltinID) {
+  case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+  case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u

[llvm-branch-commits] [clang] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)

2025-07-30 Thread via llvm-branch-commits

https://github.com/easyonaadit updated 
https://github.com/llvm/llvm-project/pull/150170

>From bc06c18461bd36dc8c732d04bc6fab6ebaa4c0d5 Mon Sep 17 00:00:00 2001
From: Aaditya 
Date: Sat, 19 Jul 2025 12:57:27 +0530
Subject: [PATCH] Add builtins for wave reduction intrinsics

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def |  25 ++
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp  |  58 +++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl  | 378 +++
 3 files changed, 461 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 878543566f0e3..b91b32457ff86 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -351,6 +351,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
 BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
 BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
 
+//===--===//
+
+// Wave Reduction builtins.
+
+//===--===//
+
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc")
+
 
//===--===//
 // R600-NI only builtins.
 
//===--===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp 
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 7dccf82b1a7a3..c3d9ec5fc8309 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -295,11 +295,69 @@ void 
CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
   Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
 }
 
+static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
+  switch (BuiltinID) {
+  default:
+llvm_unreachable("Unknown BuiltinID for wave reduction");
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
+return Intrinsic::amdgcn_wave_reduce_add;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
+return Intrinsic::amdgcn_wave_reduce_sub;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
+return Intrinsic::amdgcn_wave_reduce_min;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
+return Intrinsic::amdgcn_wave_reduce_umin;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
+return Intrinsic::amdgcn_wave_reduce_max;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
+return Intrinsic::amdgcn_wave_reduce_umax;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
+return Intrinsic::amdgcn_wave_reduce_and;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
+return Intrinsic::amdgcn_wave_reduce_or;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
+return Intrinsic::amdgcn_wave_reduce_xor;
+  }
+}
+
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   const CallExpr *E) {
   llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
   llvm::SyncScope::ID SSID;
   switch (BuiltinID) {
+  case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+  case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u

[llvm-branch-commits] [flang] [flang][OpenMP] Store directive information in OpenMPSectionConstruct (PR #150804)

2025-07-30 Thread Krzysztof Parzyszek via llvm-branch-commits

kparzysz wrote:

Ping

https://github.com/llvm/llvm-project/pull/150804
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [MemProf] Fix FileCheck prefix in the histogram test. (PR #150506)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish edited 
https://github.com/llvm/llvm-project/pull/150506
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Fix FileCheck prefix in the histogram test. (PR #150506)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish updated 
https://github.com/llvm/llvm-project/pull/150506

>From 66b78c5ea8b8e016557edf98388c03e34c51b8ce Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Thu, 24 Jul 2025 06:25:00 +
Subject: [PATCH] Fix FileCheck prefix in the histogram test.

---
 .../memprof-padding-histogram.test| 152 +-
 1 file changed, 76 insertions(+), 76 deletions(-)

diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test 
b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
index 79521f3aceb6d..2d0346e7cb259 100644
--- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -21,79 +21,79 @@ CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
 CHECK-NEXT:   -
 
 CHECK:   Records:
-CHEC-NEXTFunctionGUID: {{[0-9]+}}
-CHEC-NEXTAllocSites:
-CHEC-NEXT-
-CHEC-NEXT  Callstack:
-CHEC-NEXT  -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 3
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT  MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 5
-CHEC-NEXTMinAccessCount: 5
-CHEC-NEXTMaxAccessCount: 5
-CHEC-NEXTTotalSize: 24
-CHEC-NEXTMinSize: 24
-CHEC-NEXTMaxSize: 24
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 20
-CHEC-NEXTMinAccessDensity: 20
-CHEC-NEXTMaxAccessDensity: 20
-CHEC-NEXTTotalLifetimeAccessDensity: 2
-CHEC-NEXTMinLifetimeAccessDensity: 2
-CHEC-NEXTMaxLifetimeAccessDensity: 2
-CHEC-NEXTAccessHistogramSize: 3
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -1 -2
-CHEC-NEXT-
-CHEC-NEXT  Callstack:
-CHEC-NEXT  -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 10
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT  MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 4
-CHEC-NEXTMinAccessCount: 4
-CHEC-NEXTMaxAccessCount: 4
-CHEC-NEXTTotalSize: 48
-CHEC-NEXTMinSize: 48
-CHEC-NEXTMaxSize: 48
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 8
-CHEC-NEXTMinAccessDensity: 8
-CHEC-NEXTMaxAccessDensity: 8
-CHEC-NEXTTotalLifetimeAccessDensity: 8000
-CHEC-NEXTMinLifetimeAccessDensity: 8000
-CHEC-NEXTMaxLifetimeAccessDensity: 8000
-CHEC-NEXTAccessHistogramSize: 6
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -0 -0 -0 -1 -1
+CHECK-NEXTFunctionGUID: {{[0-9]+}}
+CHECK-NEXTAllocSites:
+CHECK-NEXT-
+CHECK-NEXT  Callstack:
+CHECK-NEXT  -
+CHECK-NEXTFunction: {{[0-9]+}}
+CHECK-NEXTSymbolName: main
+CHECK-NEXTLineOffset: 3
+CHECK-NEXTColumn: 10
+CHECK-NEXTInline: 0
+CHECK-NEXT  MemInfoBlock:
+CHECK-NEXTAllocCount: 1
+CHECK-NEXTTotalAccessCount: 5
+CHECK-NEXTMinAccessCount: 5
+CHECK-NEXTMaxAccessCount: 5
+CHECK-NEXTTotalSize: 24
+CHECK-NEXTMinSize: 24
+CHECK-NEXTMaxSize: 24
+CHECK-NEXTAllocTimestamp: {{[0-9]+}}
+CHECK-NEXTDeallocTimestamp: {{[0-9]+}}
+CHECK-NEXTTotalLifetime: 0
+CHECK-NEXTMinLifetime: 0
+CHECK-NEXTMaxLifetime: 0
+CHECK-NEXTAllocCpuId: 11
+CHECK-NEXTDeallocCpuId: 11
+CHECK-NEXTNumMigratedCpu: 0
+CHECK-NEXTNumLifetimeOverlaps: 0
+CHECK-NEXTNumSameAllocCpu: 0
+CHECK-NEXTNumSameDeallocCpu: 0
+CHECK-NEXTDataTypeId: 0
+CHECK-NEXTTotalAccessDensity: 20
+CHECK-NEXTMinAccessDensity: 20
+CHECK-NEXTMaxAccessDensity: 20
+CHECK-NEXTTotalLifetimeAccessDensity: 2
+CHECK-NEXTMinLifetimeAccessDensity: 2
+CHECK-NEXTMaxLifetimeAccessDensity: 2
+CHECK-NEXTAccessHistogramSize: 3
+CHECK-NEXTAccessHistogram: {{[0-9]+}}
+CHECK-NEXTAcce

[llvm-branch-commits] [llvm] Fix FileCheck prefix in the histogram test. (PR #150506)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish updated 
https://github.com/llvm/llvm-project/pull/150506

>From 66b78c5ea8b8e016557edf98388c03e34c51b8ce Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Thu, 24 Jul 2025 06:25:00 +
Subject: [PATCH] Fix FileCheck prefix in the histogram test.

---
 .../memprof-padding-histogram.test| 152 +-
 1 file changed, 76 insertions(+), 76 deletions(-)

diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test 
b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
index 79521f3aceb6d..2d0346e7cb259 100644
--- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -21,79 +21,79 @@ CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
 CHECK-NEXT:   -
 
 CHECK:   Records:
-CHEC-NEXTFunctionGUID: {{[0-9]+}}
-CHEC-NEXTAllocSites:
-CHEC-NEXT-
-CHEC-NEXT  Callstack:
-CHEC-NEXT  -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 3
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT  MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 5
-CHEC-NEXTMinAccessCount: 5
-CHEC-NEXTMaxAccessCount: 5
-CHEC-NEXTTotalSize: 24
-CHEC-NEXTMinSize: 24
-CHEC-NEXTMaxSize: 24
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 20
-CHEC-NEXTMinAccessDensity: 20
-CHEC-NEXTMaxAccessDensity: 20
-CHEC-NEXTTotalLifetimeAccessDensity: 2
-CHEC-NEXTMinLifetimeAccessDensity: 2
-CHEC-NEXTMaxLifetimeAccessDensity: 2
-CHEC-NEXTAccessHistogramSize: 3
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -1 -2
-CHEC-NEXT-
-CHEC-NEXT  Callstack:
-CHEC-NEXT  -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 10
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT  MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 4
-CHEC-NEXTMinAccessCount: 4
-CHEC-NEXTMaxAccessCount: 4
-CHEC-NEXTTotalSize: 48
-CHEC-NEXTMinSize: 48
-CHEC-NEXTMaxSize: 48
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 8
-CHEC-NEXTMinAccessDensity: 8
-CHEC-NEXTMaxAccessDensity: 8
-CHEC-NEXTTotalLifetimeAccessDensity: 8000
-CHEC-NEXTMinLifetimeAccessDensity: 8000
-CHEC-NEXTMaxLifetimeAccessDensity: 8000
-CHEC-NEXTAccessHistogramSize: 6
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -0 -0 -0 -1 -1
+CHECK-NEXTFunctionGUID: {{[0-9]+}}
+CHECK-NEXTAllocSites:
+CHECK-NEXT-
+CHECK-NEXT  Callstack:
+CHECK-NEXT  -
+CHECK-NEXTFunction: {{[0-9]+}}
+CHECK-NEXTSymbolName: main
+CHECK-NEXTLineOffset: 3
+CHECK-NEXTColumn: 10
+CHECK-NEXTInline: 0
+CHECK-NEXT  MemInfoBlock:
+CHECK-NEXTAllocCount: 1
+CHECK-NEXTTotalAccessCount: 5
+CHECK-NEXTMinAccessCount: 5
+CHECK-NEXTMaxAccessCount: 5
+CHECK-NEXTTotalSize: 24
+CHECK-NEXTMinSize: 24
+CHECK-NEXTMaxSize: 24
+CHECK-NEXTAllocTimestamp: {{[0-9]+}}
+CHECK-NEXTDeallocTimestamp: {{[0-9]+}}
+CHECK-NEXTTotalLifetime: 0
+CHECK-NEXTMinLifetime: 0
+CHECK-NEXTMaxLifetime: 0
+CHECK-NEXTAllocCpuId: 11
+CHECK-NEXTDeallocCpuId: 11
+CHECK-NEXTNumMigratedCpu: 0
+CHECK-NEXTNumLifetimeOverlaps: 0
+CHECK-NEXTNumSameAllocCpu: 0
+CHECK-NEXTNumSameDeallocCpu: 0
+CHECK-NEXTDataTypeId: 0
+CHECK-NEXTTotalAccessDensity: 20
+CHECK-NEXTMinAccessDensity: 20
+CHECK-NEXTMaxAccessDensity: 20
+CHECK-NEXTTotalLifetimeAccessDensity: 2
+CHECK-NEXTMinLifetimeAccessDensity: 2
+CHECK-NEXTMaxLifetimeAccessDensity: 2
+CHECK-NEXTAccessHistogramSize: 3
+CHECK-NEXTAccessHistogram: {{[0-9]+}}
+CHECK-NEXTAcce

[llvm-branch-commits] [compiler-rt] [llvm] Write out raw profile bytes in little endian. (PR #150375)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish updated 
https://github.com/llvm/llvm-project/pull/150375

>From 733f040d1e113d71a328bacce5dc5abcc61a9258 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Thu, 24 Jul 2025 06:10:55 +
Subject: [PATCH 1/3] Write out raw profile bytes in little endian.

Instead of writing out in native endian, write out the raw profile bytes
in little endian. Also update the MIB data in little endian. Also clean
up some lint and unused includes in rawprofile.cpp.
---
 .../lib/memprof/memprof_rawprofile.cpp| 17 +
 llvm/lib/ProfileData/MemProfReader.cpp| 35 +--
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp 
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index f909d78f5f36a..fbcfee3d655eb 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -7,10 +7,7 @@
 #include "sanitizer_common/sanitizer_allocator_internal.h"
 #include "sanitizer_common/sanitizer_array_ref.h"
 #include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_linux.h"
-#include "sanitizer_common/sanitizer_procmaps.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
-#include "sanitizer_common/sanitizer_stackdepotbase.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
 #include "sanitizer_common/sanitizer_vector.h"
 
@@ -23,7 +20,16 @@ using ::llvm::memprof::encodeHistogramCount;
 
 namespace {
 template  char *WriteBytes(const T &Pod, char *Buffer) {
-  *(T *)Buffer = Pod;
+  static_assert(is_trivially_copyable::value, "T must be POD");
+  const uint8_t *Src = reinterpret_cast(&Pod);
+  for (size_t I = 0; I < sizeof(T); ++I) {
+Buffer[I] = Src[I];
+  }
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  for (size_t i = 0; i < sizeof(T) / 2; ++i) {
+std::swap(buffer[i], buffer[sizeof(T) - 1 - i]);
+  }
+#endif
   return Buffer + sizeof(T);
 }
 
@@ -33,7 +39,6 @@ void RecordStackId(const uptr Key, UNUSED LockedMemInfoBlock 
*const &MIB,
   auto *StackIds = reinterpret_cast *>(Arg);
   StackIds->PushBack(Key);
 }
-} // namespace
 
 u64 SegmentSizeBytes(ArrayRef Modules) {
   u64 NumSegmentsToRecord = 0;
@@ -184,6 +189,7 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const 
Vector &StackIds,
   CHECK(ExpectedNumBytes >= static_cast(Ptr - Buffer) &&
 "Expected num bytes != actual bytes written");
 }
+} // namespace
 
 // Format
 // -- Header
@@ -288,5 +294,4 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, 
ArrayRef Modules,
 
   return TotalSizeBytes;
 }
-
 } // namespace __memprof
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp 
b/llvm/lib/ProfileData/MemProfReader.cpp
index 9db699712d6f3..3fc0dbfd8e69d 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -146,8 +146,39 @@ readMemInfoBlocksCommon(const char *Ptr, bool 
IsHistogramEncoded = false) {
 const uint64_t Id =
 endian::readNext(Ptr);
 
-MemInfoBlock MIB = *reinterpret_cast(Ptr);
-Ptr += sizeof(MemInfoBlock);
+MemInfoBlock MIB;
+#define READ_MIB_FIELD(FIELD)  
\
+  MIB.FIELD = endian::readNext(Ptr)
+
+READ_MIB_FIELD(AllocCount);
+READ_MIB_FIELD(TotalAccessCount);
+READ_MIB_FIELD(MinAccessCount);
+READ_MIB_FIELD(MaxAccessCount);
+READ_MIB_FIELD(TotalSize);
+READ_MIB_FIELD(MinSize);
+READ_MIB_FIELD(MaxSize);
+READ_MIB_FIELD(AllocTimestamp);
+READ_MIB_FIELD(DeallocTimestamp);
+READ_MIB_FIELD(TotalLifetime);
+READ_MIB_FIELD(MinLifetime);
+READ_MIB_FIELD(MaxLifetime);
+READ_MIB_FIELD(AllocCpuId);
+READ_MIB_FIELD(DeallocCpuId);
+READ_MIB_FIELD(NumMigratedCpu);
+READ_MIB_FIELD(NumLifetimeOverlaps);
+READ_MIB_FIELD(NumSameAllocCpu);
+READ_MIB_FIELD(NumSameDeallocCpu);
+READ_MIB_FIELD(DataTypeId);
+READ_MIB_FIELD(TotalAccessDensity);
+READ_MIB_FIELD(MinAccessDensity);
+READ_MIB_FIELD(MaxAccessDensity);
+READ_MIB_FIELD(TotalLifetimeAccessDensity);
+READ_MIB_FIELD(MinLifetimeAccessDensity);
+READ_MIB_FIELD(MaxLifetimeAccessDensity);
+READ_MIB_FIELD(AccessHistogramSize);
+READ_MIB_FIELD(AccessHistogram);
+#undef READ_MIB_FIELD
 
 if (MIB.AccessHistogramSize > 0) {
   // The in-memory representation uses uint64_t for histogram entries.

>From 4f3fc5e1eef921d5fa0058cf91a2c139ba7af249 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Wed, 30 Jul 2025 00:35:20 +
Subject: [PATCH 2/3] Address comment

---
 compiler-rt/lib/memprof/memprof_rawprofile.cpp | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp 
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index fbcfee3d655eb..bf04afa679c9c 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -22,13 +22,13 @@ namespace

[llvm-branch-commits] [compiler-rt] [llvm] Write out raw profile bytes in little endian. (PR #150375)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish updated 
https://github.com/llvm/llvm-project/pull/150375

>From 733f040d1e113d71a328bacce5dc5abcc61a9258 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Thu, 24 Jul 2025 06:10:55 +
Subject: [PATCH 1/3] Write out raw profile bytes in little endian.

Instead of writing out in native endian, write out the raw profile bytes
in little endian. Also update the MIB data in little endian. Also clean
up some lint and unused includes in rawprofile.cpp.
---
 .../lib/memprof/memprof_rawprofile.cpp| 17 +
 llvm/lib/ProfileData/MemProfReader.cpp| 35 +--
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp 
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index f909d78f5f36a..fbcfee3d655eb 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -7,10 +7,7 @@
 #include "sanitizer_common/sanitizer_allocator_internal.h"
 #include "sanitizer_common/sanitizer_array_ref.h"
 #include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_linux.h"
-#include "sanitizer_common/sanitizer_procmaps.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
-#include "sanitizer_common/sanitizer_stackdepotbase.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
 #include "sanitizer_common/sanitizer_vector.h"
 
@@ -23,7 +20,16 @@ using ::llvm::memprof::encodeHistogramCount;
 
 namespace {
 template  char *WriteBytes(const T &Pod, char *Buffer) {
-  *(T *)Buffer = Pod;
+  static_assert(is_trivially_copyable::value, "T must be POD");
+  const uint8_t *Src = reinterpret_cast(&Pod);
+  for (size_t I = 0; I < sizeof(T); ++I) {
+Buffer[I] = Src[I];
+  }
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  for (size_t i = 0; i < sizeof(T) / 2; ++i) {
+std::swap(buffer[i], buffer[sizeof(T) - 1 - i]);
+  }
+#endif
   return Buffer + sizeof(T);
 }
 
@@ -33,7 +39,6 @@ void RecordStackId(const uptr Key, UNUSED LockedMemInfoBlock 
*const &MIB,
   auto *StackIds = reinterpret_cast *>(Arg);
   StackIds->PushBack(Key);
 }
-} // namespace
 
 u64 SegmentSizeBytes(ArrayRef Modules) {
   u64 NumSegmentsToRecord = 0;
@@ -184,6 +189,7 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const 
Vector &StackIds,
   CHECK(ExpectedNumBytes >= static_cast(Ptr - Buffer) &&
 "Expected num bytes != actual bytes written");
 }
+} // namespace
 
 // Format
 // -- Header
@@ -288,5 +294,4 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, 
ArrayRef Modules,
 
   return TotalSizeBytes;
 }
-
 } // namespace __memprof
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp 
b/llvm/lib/ProfileData/MemProfReader.cpp
index 9db699712d6f3..3fc0dbfd8e69d 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -146,8 +146,39 @@ readMemInfoBlocksCommon(const char *Ptr, bool 
IsHistogramEncoded = false) {
 const uint64_t Id =
 endian::readNext(Ptr);
 
-MemInfoBlock MIB = *reinterpret_cast(Ptr);
-Ptr += sizeof(MemInfoBlock);
+MemInfoBlock MIB;
+#define READ_MIB_FIELD(FIELD)  
\
+  MIB.FIELD = endian::readNext(Ptr)
+
+READ_MIB_FIELD(AllocCount);
+READ_MIB_FIELD(TotalAccessCount);
+READ_MIB_FIELD(MinAccessCount);
+READ_MIB_FIELD(MaxAccessCount);
+READ_MIB_FIELD(TotalSize);
+READ_MIB_FIELD(MinSize);
+READ_MIB_FIELD(MaxSize);
+READ_MIB_FIELD(AllocTimestamp);
+READ_MIB_FIELD(DeallocTimestamp);
+READ_MIB_FIELD(TotalLifetime);
+READ_MIB_FIELD(MinLifetime);
+READ_MIB_FIELD(MaxLifetime);
+READ_MIB_FIELD(AllocCpuId);
+READ_MIB_FIELD(DeallocCpuId);
+READ_MIB_FIELD(NumMigratedCpu);
+READ_MIB_FIELD(NumLifetimeOverlaps);
+READ_MIB_FIELD(NumSameAllocCpu);
+READ_MIB_FIELD(NumSameDeallocCpu);
+READ_MIB_FIELD(DataTypeId);
+READ_MIB_FIELD(TotalAccessDensity);
+READ_MIB_FIELD(MinAccessDensity);
+READ_MIB_FIELD(MaxAccessDensity);
+READ_MIB_FIELD(TotalLifetimeAccessDensity);
+READ_MIB_FIELD(MinLifetimeAccessDensity);
+READ_MIB_FIELD(MaxLifetimeAccessDensity);
+READ_MIB_FIELD(AccessHistogramSize);
+READ_MIB_FIELD(AccessHistogram);
+#undef READ_MIB_FIELD
 
 if (MIB.AccessHistogramSize > 0) {
   // The in-memory representation uses uint64_t for histogram entries.

>From 4f3fc5e1eef921d5fa0058cf91a2c139ba7af249 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Wed, 30 Jul 2025 00:35:20 +
Subject: [PATCH 2/3] Address comment

---
 compiler-rt/lib/memprof/memprof_rawprofile.cpp | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp 
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index fbcfee3d655eb..bf04afa679c9c 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -22,13 +22,13 @@ namespace

[llvm-branch-commits] [compiler-rt] [llvm] [MemProf] Write out raw profile bytes in little endian. (PR #150375)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish edited 
https://github.com/llvm/llvm-project/pull/150375
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [llvm] [MemProf] Write out raw profile bytes in little endian. (PR #150375)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish updated 
https://github.com/llvm/llvm-project/pull/150375

>From 090331353b22e4b97244f25166a0801ddecbef55 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Thu, 24 Jul 2025 06:10:55 +
Subject: [PATCH 1/3] Write out raw profile bytes in little endian.

Instead of writing out in native endian, write out the raw profile bytes
in little endian. Also update the MIB data in little endian. Also clean
up some lint and unused includes in rawprofile.cpp.
---
 .../lib/memprof/memprof_rawprofile.cpp| 17 +
 llvm/lib/ProfileData/MemProfReader.cpp| 35 +--
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp 
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index f909d78f5f36a..fbcfee3d655eb 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -7,10 +7,7 @@
 #include "sanitizer_common/sanitizer_allocator_internal.h"
 #include "sanitizer_common/sanitizer_array_ref.h"
 #include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_linux.h"
-#include "sanitizer_common/sanitizer_procmaps.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
-#include "sanitizer_common/sanitizer_stackdepotbase.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
 #include "sanitizer_common/sanitizer_vector.h"
 
@@ -23,7 +20,16 @@ using ::llvm::memprof::encodeHistogramCount;
 
 namespace {
 template  char *WriteBytes(const T &Pod, char *Buffer) {
-  *(T *)Buffer = Pod;
+  static_assert(is_trivially_copyable::value, "T must be POD");
+  const uint8_t *Src = reinterpret_cast(&Pod);
+  for (size_t I = 0; I < sizeof(T); ++I) {
+Buffer[I] = Src[I];
+  }
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  for (size_t i = 0; i < sizeof(T) / 2; ++i) {
+std::swap(buffer[i], buffer[sizeof(T) - 1 - i]);
+  }
+#endif
   return Buffer + sizeof(T);
 }
 
@@ -33,7 +39,6 @@ void RecordStackId(const uptr Key, UNUSED LockedMemInfoBlock 
*const &MIB,
   auto *StackIds = reinterpret_cast *>(Arg);
   StackIds->PushBack(Key);
 }
-} // namespace
 
 u64 SegmentSizeBytes(ArrayRef Modules) {
   u64 NumSegmentsToRecord = 0;
@@ -184,6 +189,7 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const 
Vector &StackIds,
   CHECK(ExpectedNumBytes >= static_cast(Ptr - Buffer) &&
 "Expected num bytes != actual bytes written");
 }
+} // namespace
 
 // Format
 // -- Header
@@ -288,5 +294,4 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, 
ArrayRef Modules,
 
   return TotalSizeBytes;
 }
-
 } // namespace __memprof
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp 
b/llvm/lib/ProfileData/MemProfReader.cpp
index 9db699712d6f3..3fc0dbfd8e69d 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -146,8 +146,39 @@ readMemInfoBlocksCommon(const char *Ptr, bool 
IsHistogramEncoded = false) {
 const uint64_t Id =
 endian::readNext(Ptr);
 
-MemInfoBlock MIB = *reinterpret_cast(Ptr);
-Ptr += sizeof(MemInfoBlock);
+MemInfoBlock MIB;
+#define READ_MIB_FIELD(FIELD)  
\
+  MIB.FIELD = endian::readNext(Ptr)
+
+READ_MIB_FIELD(AllocCount);
+READ_MIB_FIELD(TotalAccessCount);
+READ_MIB_FIELD(MinAccessCount);
+READ_MIB_FIELD(MaxAccessCount);
+READ_MIB_FIELD(TotalSize);
+READ_MIB_FIELD(MinSize);
+READ_MIB_FIELD(MaxSize);
+READ_MIB_FIELD(AllocTimestamp);
+READ_MIB_FIELD(DeallocTimestamp);
+READ_MIB_FIELD(TotalLifetime);
+READ_MIB_FIELD(MinLifetime);
+READ_MIB_FIELD(MaxLifetime);
+READ_MIB_FIELD(AllocCpuId);
+READ_MIB_FIELD(DeallocCpuId);
+READ_MIB_FIELD(NumMigratedCpu);
+READ_MIB_FIELD(NumLifetimeOverlaps);
+READ_MIB_FIELD(NumSameAllocCpu);
+READ_MIB_FIELD(NumSameDeallocCpu);
+READ_MIB_FIELD(DataTypeId);
+READ_MIB_FIELD(TotalAccessDensity);
+READ_MIB_FIELD(MinAccessDensity);
+READ_MIB_FIELD(MaxAccessDensity);
+READ_MIB_FIELD(TotalLifetimeAccessDensity);
+READ_MIB_FIELD(MinLifetimeAccessDensity);
+READ_MIB_FIELD(MaxLifetimeAccessDensity);
+READ_MIB_FIELD(AccessHistogramSize);
+READ_MIB_FIELD(AccessHistogram);
+#undef READ_MIB_FIELD
 
 if (MIB.AccessHistogramSize > 0) {
   // The in-memory representation uses uint64_t for histogram entries.

>From 83336c8e84e24409acef3c5c988ad7c2e039452d Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Wed, 30 Jul 2025 00:35:20 +
Subject: [PATCH 2/3] Address comment

---
 compiler-rt/lib/memprof/memprof_rawprofile.cpp | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp 
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index fbcfee3d655eb..bf04afa679c9c 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -22,13 +22,13 @@ namespace

[llvm-branch-commits] [compiler-rt] [llvm] [MemProf] Write out raw profile bytes in little endian. (PR #150375)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish updated 
https://github.com/llvm/llvm-project/pull/150375

>From 090331353b22e4b97244f25166a0801ddecbef55 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Thu, 24 Jul 2025 06:10:55 +
Subject: [PATCH 1/3] Write out raw profile bytes in little endian.

Instead of writing out in native endian, write out the raw profile bytes
in little endian. Also update the MIB data in little endian. Also clean
up some lint and unused includes in rawprofile.cpp.
---
 .../lib/memprof/memprof_rawprofile.cpp| 17 +
 llvm/lib/ProfileData/MemProfReader.cpp| 35 +--
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp 
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index f909d78f5f36a..fbcfee3d655eb 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -7,10 +7,7 @@
 #include "sanitizer_common/sanitizer_allocator_internal.h"
 #include "sanitizer_common/sanitizer_array_ref.h"
 #include "sanitizer_common/sanitizer_common.h"
-#include "sanitizer_common/sanitizer_linux.h"
-#include "sanitizer_common/sanitizer_procmaps.h"
 #include "sanitizer_common/sanitizer_stackdepot.h"
-#include "sanitizer_common/sanitizer_stackdepotbase.h"
 #include "sanitizer_common/sanitizer_stacktrace.h"
 #include "sanitizer_common/sanitizer_vector.h"
 
@@ -23,7 +20,16 @@ using ::llvm::memprof::encodeHistogramCount;
 
 namespace {
 template  char *WriteBytes(const T &Pod, char *Buffer) {
-  *(T *)Buffer = Pod;
+  static_assert(is_trivially_copyable::value, "T must be POD");
+  const uint8_t *Src = reinterpret_cast(&Pod);
+  for (size_t I = 0; I < sizeof(T); ++I) {
+Buffer[I] = Src[I];
+  }
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  for (size_t i = 0; i < sizeof(T) / 2; ++i) {
+std::swap(buffer[i], buffer[sizeof(T) - 1 - i]);
+  }
+#endif
   return Buffer + sizeof(T);
 }
 
@@ -33,7 +39,6 @@ void RecordStackId(const uptr Key, UNUSED LockedMemInfoBlock 
*const &MIB,
   auto *StackIds = reinterpret_cast *>(Arg);
   StackIds->PushBack(Key);
 }
-} // namespace
 
 u64 SegmentSizeBytes(ArrayRef Modules) {
   u64 NumSegmentsToRecord = 0;
@@ -184,6 +189,7 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const 
Vector &StackIds,
   CHECK(ExpectedNumBytes >= static_cast(Ptr - Buffer) &&
 "Expected num bytes != actual bytes written");
 }
+} // namespace
 
 // Format
 // -- Header
@@ -288,5 +294,4 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, 
ArrayRef Modules,
 
   return TotalSizeBytes;
 }
-
 } // namespace __memprof
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp 
b/llvm/lib/ProfileData/MemProfReader.cpp
index 9db699712d6f3..3fc0dbfd8e69d 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -146,8 +146,39 @@ readMemInfoBlocksCommon(const char *Ptr, bool 
IsHistogramEncoded = false) {
 const uint64_t Id =
 endian::readNext(Ptr);
 
-MemInfoBlock MIB = *reinterpret_cast(Ptr);
-Ptr += sizeof(MemInfoBlock);
+MemInfoBlock MIB;
+#define READ_MIB_FIELD(FIELD)  
\
+  MIB.FIELD = endian::readNext(Ptr)
+
+READ_MIB_FIELD(AllocCount);
+READ_MIB_FIELD(TotalAccessCount);
+READ_MIB_FIELD(MinAccessCount);
+READ_MIB_FIELD(MaxAccessCount);
+READ_MIB_FIELD(TotalSize);
+READ_MIB_FIELD(MinSize);
+READ_MIB_FIELD(MaxSize);
+READ_MIB_FIELD(AllocTimestamp);
+READ_MIB_FIELD(DeallocTimestamp);
+READ_MIB_FIELD(TotalLifetime);
+READ_MIB_FIELD(MinLifetime);
+READ_MIB_FIELD(MaxLifetime);
+READ_MIB_FIELD(AllocCpuId);
+READ_MIB_FIELD(DeallocCpuId);
+READ_MIB_FIELD(NumMigratedCpu);
+READ_MIB_FIELD(NumLifetimeOverlaps);
+READ_MIB_FIELD(NumSameAllocCpu);
+READ_MIB_FIELD(NumSameDeallocCpu);
+READ_MIB_FIELD(DataTypeId);
+READ_MIB_FIELD(TotalAccessDensity);
+READ_MIB_FIELD(MinAccessDensity);
+READ_MIB_FIELD(MaxAccessDensity);
+READ_MIB_FIELD(TotalLifetimeAccessDensity);
+READ_MIB_FIELD(MinLifetimeAccessDensity);
+READ_MIB_FIELD(MaxLifetimeAccessDensity);
+READ_MIB_FIELD(AccessHistogramSize);
+READ_MIB_FIELD(AccessHistogram);
+#undef READ_MIB_FIELD
 
 if (MIB.AccessHistogramSize > 0) {
   // The in-memory representation uses uint64_t for histogram entries.

>From 83336c8e84e24409acef3c5c988ad7c2e039452d Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Wed, 30 Jul 2025 00:35:20 +
Subject: [PATCH 2/3] Address comment

---
 compiler-rt/lib/memprof/memprof_rawprofile.cpp | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp 
b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index fbcfee3d655eb..bf04afa679c9c 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -22,13 +22,13 @@ namespace

[llvm-branch-commits] [llvm] [MemProf] Fix FileCheck prefix in the histogram test. (PR #150506)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish updated 
https://github.com/llvm/llvm-project/pull/150506

>From 9305bff0674d2c6cd7522af4647daef645ee2f85 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Thu, 24 Jul 2025 06:25:00 +
Subject: [PATCH] Fix FileCheck prefix in the histogram test.

---
 .../memprof-padding-histogram.test| 152 +-
 1 file changed, 76 insertions(+), 76 deletions(-)

diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test 
b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
index 79521f3aceb6d..2d0346e7cb259 100644
--- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -21,79 +21,79 @@ CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
 CHECK-NEXT:   -
 
 CHECK:   Records:
-CHEC-NEXTFunctionGUID: {{[0-9]+}}
-CHEC-NEXTAllocSites:
-CHEC-NEXT-
-CHEC-NEXT  Callstack:
-CHEC-NEXT  -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 3
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT  MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 5
-CHEC-NEXTMinAccessCount: 5
-CHEC-NEXTMaxAccessCount: 5
-CHEC-NEXTTotalSize: 24
-CHEC-NEXTMinSize: 24
-CHEC-NEXTMaxSize: 24
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 20
-CHEC-NEXTMinAccessDensity: 20
-CHEC-NEXTMaxAccessDensity: 20
-CHEC-NEXTTotalLifetimeAccessDensity: 2
-CHEC-NEXTMinLifetimeAccessDensity: 2
-CHEC-NEXTMaxLifetimeAccessDensity: 2
-CHEC-NEXTAccessHistogramSize: 3
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -1 -2
-CHEC-NEXT-
-CHEC-NEXT  Callstack:
-CHEC-NEXT  -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 10
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT  MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 4
-CHEC-NEXTMinAccessCount: 4
-CHEC-NEXTMaxAccessCount: 4
-CHEC-NEXTTotalSize: 48
-CHEC-NEXTMinSize: 48
-CHEC-NEXTMaxSize: 48
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 8
-CHEC-NEXTMinAccessDensity: 8
-CHEC-NEXTMaxAccessDensity: 8
-CHEC-NEXTTotalLifetimeAccessDensity: 8000
-CHEC-NEXTMinLifetimeAccessDensity: 8000
-CHEC-NEXTMaxLifetimeAccessDensity: 8000
-CHEC-NEXTAccessHistogramSize: 6
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -0 -0 -0 -1 -1
+CHECK-NEXTFunctionGUID: {{[0-9]+}}
+CHECK-NEXTAllocSites:
+CHECK-NEXT-
+CHECK-NEXT  Callstack:
+CHECK-NEXT  -
+CHECK-NEXTFunction: {{[0-9]+}}
+CHECK-NEXTSymbolName: main
+CHECK-NEXTLineOffset: 3
+CHECK-NEXTColumn: 10
+CHECK-NEXTInline: 0
+CHECK-NEXT  MemInfoBlock:
+CHECK-NEXTAllocCount: 1
+CHECK-NEXTTotalAccessCount: 5
+CHECK-NEXTMinAccessCount: 5
+CHECK-NEXTMaxAccessCount: 5
+CHECK-NEXTTotalSize: 24
+CHECK-NEXTMinSize: 24
+CHECK-NEXTMaxSize: 24
+CHECK-NEXTAllocTimestamp: {{[0-9]+}}
+CHECK-NEXTDeallocTimestamp: {{[0-9]+}}
+CHECK-NEXTTotalLifetime: 0
+CHECK-NEXTMinLifetime: 0
+CHECK-NEXTMaxLifetime: 0
+CHECK-NEXTAllocCpuId: 11
+CHECK-NEXTDeallocCpuId: 11
+CHECK-NEXTNumMigratedCpu: 0
+CHECK-NEXTNumLifetimeOverlaps: 0
+CHECK-NEXTNumSameAllocCpu: 0
+CHECK-NEXTNumSameDeallocCpu: 0
+CHECK-NEXTDataTypeId: 0
+CHECK-NEXTTotalAccessDensity: 20
+CHECK-NEXTMinAccessDensity: 20
+CHECK-NEXTMaxAccessDensity: 20
+CHECK-NEXTTotalLifetimeAccessDensity: 2
+CHECK-NEXTMinLifetimeAccessDensity: 2
+CHECK-NEXTMaxLifetimeAccessDensity: 2
+CHECK-NEXTAccessHistogramSize: 3
+CHECK-NEXTAccessHistogram: {{[0-9]+}}
+CHECK-NEXTAcce

[llvm-branch-commits] [llvm] [MemProf] Fix FileCheck prefix in the histogram test. (PR #150506)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish updated 
https://github.com/llvm/llvm-project/pull/150506

>From 9305bff0674d2c6cd7522af4647daef645ee2f85 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Thu, 24 Jul 2025 06:25:00 +
Subject: [PATCH] Fix FileCheck prefix in the histogram test.

---
 .../memprof-padding-histogram.test| 152 +-
 1 file changed, 76 insertions(+), 76 deletions(-)

diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test 
b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
index 79521f3aceb6d..2d0346e7cb259 100644
--- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -21,79 +21,79 @@ CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
 CHECK-NEXT:   -
 
 CHECK:   Records:
-CHEC-NEXTFunctionGUID: {{[0-9]+}}
-CHEC-NEXTAllocSites:
-CHEC-NEXT-
-CHEC-NEXT  Callstack:
-CHEC-NEXT  -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 3
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT  MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 5
-CHEC-NEXTMinAccessCount: 5
-CHEC-NEXTMaxAccessCount: 5
-CHEC-NEXTTotalSize: 24
-CHEC-NEXTMinSize: 24
-CHEC-NEXTMaxSize: 24
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 20
-CHEC-NEXTMinAccessDensity: 20
-CHEC-NEXTMaxAccessDensity: 20
-CHEC-NEXTTotalLifetimeAccessDensity: 2
-CHEC-NEXTMinLifetimeAccessDensity: 2
-CHEC-NEXTMaxLifetimeAccessDensity: 2
-CHEC-NEXTAccessHistogramSize: 3
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -1 -2
-CHEC-NEXT-
-CHEC-NEXT  Callstack:
-CHEC-NEXT  -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 10
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT  MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 4
-CHEC-NEXTMinAccessCount: 4
-CHEC-NEXTMaxAccessCount: 4
-CHEC-NEXTTotalSize: 48
-CHEC-NEXTMinSize: 48
-CHEC-NEXTMaxSize: 48
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 8
-CHEC-NEXTMinAccessDensity: 8
-CHEC-NEXTMaxAccessDensity: 8
-CHEC-NEXTTotalLifetimeAccessDensity: 8000
-CHEC-NEXTMinLifetimeAccessDensity: 8000
-CHEC-NEXTMaxLifetimeAccessDensity: 8000
-CHEC-NEXTAccessHistogramSize: 6
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -0 -0 -0 -1 -1
+CHECK-NEXTFunctionGUID: {{[0-9]+}}
+CHECK-NEXTAllocSites:
+CHECK-NEXT-
+CHECK-NEXT  Callstack:
+CHECK-NEXT  -
+CHECK-NEXTFunction: {{[0-9]+}}
+CHECK-NEXTSymbolName: main
+CHECK-NEXTLineOffset: 3
+CHECK-NEXTColumn: 10
+CHECK-NEXTInline: 0
+CHECK-NEXT  MemInfoBlock:
+CHECK-NEXTAllocCount: 1
+CHECK-NEXTTotalAccessCount: 5
+CHECK-NEXTMinAccessCount: 5
+CHECK-NEXTMaxAccessCount: 5
+CHECK-NEXTTotalSize: 24
+CHECK-NEXTMinSize: 24
+CHECK-NEXTMaxSize: 24
+CHECK-NEXTAllocTimestamp: {{[0-9]+}}
+CHECK-NEXTDeallocTimestamp: {{[0-9]+}}
+CHECK-NEXTTotalLifetime: 0
+CHECK-NEXTMinLifetime: 0
+CHECK-NEXTMaxLifetime: 0
+CHECK-NEXTAllocCpuId: 11
+CHECK-NEXTDeallocCpuId: 11
+CHECK-NEXTNumMigratedCpu: 0
+CHECK-NEXTNumLifetimeOverlaps: 0
+CHECK-NEXTNumSameAllocCpu: 0
+CHECK-NEXTNumSameDeallocCpu: 0
+CHECK-NEXTDataTypeId: 0
+CHECK-NEXTTotalAccessDensity: 20
+CHECK-NEXTMinAccessDensity: 20
+CHECK-NEXTMaxAccessDensity: 20
+CHECK-NEXTTotalLifetimeAccessDensity: 2
+CHECK-NEXTMinLifetimeAccessDensity: 2
+CHECK-NEXTMaxLifetimeAccessDensity: 2
+CHECK-NEXTAccessHistogramSize: 3
+CHECK-NEXTAccessHistogram: {{[0-9]+}}
+CHECK-NEXTAcce

[llvm-branch-commits] [clang] release/21.x: [clang-format] Disable IntegerLiteralSeparator for C++ before c++14 (#151273) (PR #151362)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-format

Author: None (llvmbot)


Changes

Backport 5fc482cfc0fa70c98e14d64d83dffbf7da03c303

Requested by: @owenca

---
Full diff: https://github.com/llvm/llvm-project/pull/151362.diff


2 Files Affected:

- (modified) clang/lib/Format/IntegerLiteralSeparatorFixer.cpp (+7-4) 
- (modified) clang/unittests/Format/IntegerLiteralSeparatorTest.cpp (+3) 


``diff
diff --git a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp 
b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
index 87823ae32b113..aa752f5e3148a 100644
--- a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
+++ b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
@@ -45,15 +45,18 @@ std::pair
 IntegerLiteralSeparatorFixer::process(const Environment &Env,
   const FormatStyle &Style) {
   switch (Style.Language) {
-  case FormatStyle::LK_Cpp:
-  case FormatStyle::LK_ObjC:
-Separator = '\'';
-break;
   case FormatStyle::LK_CSharp:
   case FormatStyle::LK_Java:
   case FormatStyle::LK_JavaScript:
 Separator = '_';
 break;
+  case FormatStyle::LK_Cpp:
+  case FormatStyle::LK_ObjC:
+if (Style.Standard >= FormatStyle::LS_Cpp14) {
+  Separator = '\'';
+  break;
+}
+[[fallthrough]];
   default:
 return {};
   }
diff --git a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp 
b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
index b1e42e924e05c..67b9cc9037905 100644
--- a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
+++ b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
@@ -83,6 +83,9 @@ TEST_F(IntegerLiteralSeparatorTest, SingleQuoteAsSeparator) {
"d = 5678_km;\n"
"h = 0xDEF_u16;",
Style);
+
+  Style.Standard = FormatStyle::LS_Cpp11;
+  verifyFormat("ld = 1234L;", Style);
 }
 
 TEST_F(IntegerLiteralSeparatorTest, UnderscoreAsSeparator) {

``




https://github.com/llvm/llvm-project/pull/151362
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang-format] Disable IntegerLiteralSeparator for C++ before c++14 (#151273) (PR #151362)

2025-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/151362

Backport 5fc482cfc0fa70c98e14d64d83dffbf7da03c303

Requested by: @owenca

>From dd68262f577de21b19dda270e5b97e2327aa8186 Mon Sep 17 00:00:00 2001
From: Owen Pan 
Date: Wed, 30 Jul 2025 09:43:46 -0700
Subject: [PATCH] [clang-format] Disable IntegerLiteralSeparator for C++ before
 c++14 (#151273)

Fixes #151102

(cherry picked from commit 5fc482cfc0fa70c98e14d64d83dffbf7da03c303)
---
 clang/lib/Format/IntegerLiteralSeparatorFixer.cpp | 11 +++
 .../unittests/Format/IntegerLiteralSeparatorTest.cpp  |  3 +++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp 
b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
index 87823ae32b113..aa752f5e3148a 100644
--- a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
+++ b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
@@ -45,15 +45,18 @@ std::pair
 IntegerLiteralSeparatorFixer::process(const Environment &Env,
   const FormatStyle &Style) {
   switch (Style.Language) {
-  case FormatStyle::LK_Cpp:
-  case FormatStyle::LK_ObjC:
-Separator = '\'';
-break;
   case FormatStyle::LK_CSharp:
   case FormatStyle::LK_Java:
   case FormatStyle::LK_JavaScript:
 Separator = '_';
 break;
+  case FormatStyle::LK_Cpp:
+  case FormatStyle::LK_ObjC:
+if (Style.Standard >= FormatStyle::LS_Cpp14) {
+  Separator = '\'';
+  break;
+}
+[[fallthrough]];
   default:
 return {};
   }
diff --git a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp 
b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
index b1e42e924e05c..67b9cc9037905 100644
--- a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
+++ b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
@@ -83,6 +83,9 @@ TEST_F(IntegerLiteralSeparatorTest, SingleQuoteAsSeparator) {
"d = 5678_km;\n"
"h = 0xDEF_u16;",
Style);
+
+  Style.Standard = FormatStyle::LS_Cpp11;
+  verifyFormat("ld = 1234L;", Style);
 }
 
 TEST_F(IntegerLiteralSeparatorTest, UnderscoreAsSeparator) {

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang-format] Disable IntegerLiteralSeparator for C++ before c++14 (#151273) (PR #151362)

2025-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/151362
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang-format] Disable IntegerLiteralSeparator for C++ before c++14 (#151273) (PR #151362)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:

@HazardyKnusperkeks What do you think about merging this PR to the release 
branch?

https://github.com/llvm/llvm-project/pull/151362
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libcxxabi] [llvm] release/21.x: [libc++][hardening] Introduce assertion semantics. (#149459) (PR #151095)

2025-07-30 Thread Konstantin Varlamov via llvm-branch-commits

var-const wrote:

@tru Friendly ping. :) The CI job seems stuck but IIUC, it succeeded, it's just 
that it's stuck on reporting itself as done (@ldionne please correct me if I'm 
wrong here).

https://github.com/llvm/llvm-project/pull/151095
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libc] [llvm] [libc][math] Refactor atanhf implementation to header-only in src/__support/math folder. (PR #151399)

2025-07-30 Thread Muhammad Bassiouni via llvm-branch-commits

https://github.com/bassiounix updated 
https://github.com/llvm/llvm-project/pull/151399

>From 2fd12f451dbc98bc078fc3d86e71227e66950e3d Mon Sep 17 00:00:00 2001
From: bassiounix 
Date: Thu, 31 Jul 2025 00:41:13 +0300
Subject: [PATCH] [libc][math] Refactor atanhf implementation to header-only in
 src/__support/math folder.

---
 libc/shared/math.h|  1 +
 libc/shared/math/atanhf.h | 23 ++
 libc/src/__support/math/CMakeLists.txt| 11 +++
 libc/src/__support/math/atanhf.h  | 76 +++
 libc/src/math/generic/CMakeLists.txt  |  5 +-
 libc/src/math/generic/atanhf.cpp  | 56 +-
 libc/test/shared/CMakeLists.txt   |  1 +
 libc/test/shared/shared_math_test.cpp |  1 +
 .../llvm-project-overlay/libc/BUILD.bazel | 20 +++--
 9 files changed, 129 insertions(+), 65 deletions(-)
 create mode 100644 libc/shared/math/atanhf.h
 create mode 100644 libc/src/__support/math/atanhf.h

diff --git a/libc/shared/math.h b/libc/shared/math.h
index 6cb583c08dedd..ddf219ece8ff1 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -28,6 +28,7 @@
 #include "math/atan2f128.h"
 #include "math/atanf.h"
 #include "math/atanf16.h"
+#include "math/atanhf.h"
 #include "math/erff.h"
 #include "math/exp.h"
 #include "math/exp10.h"
diff --git a/libc/shared/math/atanhf.h b/libc/shared/math/atanhf.h
new file mode 100644
index 0..763fb3e00a659
--- /dev/null
+++ b/libc/shared/math/atanhf.h
@@ -0,0 +1,23 @@
+//===-- Shared atanhf function --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATANHF_H
+#define LLVM_LIBC_SHARED_MATH_ATANHF_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atanhf.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atanhf;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ATANHF_H
diff --git a/libc/src/__support/math/CMakeLists.txt 
b/libc/src/__support/math/CMakeLists.txt
index caafdc2cbf1d6..500dd9de2c555 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -275,6 +275,17 @@ add_header_library(
 libc.src.__support.macros.optimization
 )
 
+add_header_library(
+  atanhf
+  HDRS
+atanhf.h
+  DEPENDS
+.acoshf_utils
+libc.src.__support.FPUtil.fp_bits
+libc.src.__support.FPUtil.fenv_impl
+libc.src.__support.macros.optimization
+)
+
 add_header_library(
   asinf
   HDRS
diff --git a/libc/src/__support/math/atanhf.h b/libc/src/__support/math/atanhf.h
new file mode 100644
index 0..b3ee5bbb4d408
--- /dev/null
+++ b/libc/src/__support/math/atanhf.h
@@ -0,0 +1,76 @@
+//===-- Implementation header for atanhf *- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF_H
+
+#include "acoshf_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float atanhf(float x) {
+  using namespace acoshf_internal;
+  using FPBits = typename fputil::FPBits;
+
+  FPBits xbits(x);
+  Sign sign = xbits.sign();
+  uint32_t x_abs = xbits.abs().uintval();
+
+  // |x| >= 1.0
+  if (LIBC_UNLIKELY(x_abs >= 0x3F80'U)) {
+if (xbits.is_nan()) {
+  if (xbits.is_signaling_nan()) {
+fputil::raise_except_if_required(FE_INVALID);
+return FPBits::quiet_nan().get_val();
+  }
+  return x;
+}
+// |x| == 1.0
+if (x_abs == 0x3F80'U) {
+  fputil::set_errno_if_required(ERANGE);
+  fputil::raise_except_if_required(FE_DIVBYZERO);
+  return FPBits::inf(sign).get_val();
+} else {
+  fputil::set_errno_if_required(EDOM);
+  fputil::raise_except_if_required(FE_INVALID);
+  return FPBits::quiet_nan().get_val();
+}
+  }
+
+  // |x| < ~0.10
+  if (LIBC_UNLIKELY(x_abs <= 0x3dcc'U)) {
+// |x| <= 2^-26
+if (LIBC_UNLIKELY(x_abs <= 0x3280'U)) {
+  return static_cast(LIBC_UNLIKELY(x_abs == 0)
+? x
+: (x + 0x1.5p-2 * x * x * x));
+}
+
+double xdbl = x;
+double x2 

[llvm-branch-commits] [llvm] [MemProf] Fix FileCheck prefix in the histogram test. (PR #150506)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish updated 
https://github.com/llvm/llvm-project/pull/150506

>From 385e2e93795c3520760c7592a45f256aaad0694b Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Thu, 24 Jul 2025 06:25:00 +
Subject: [PATCH] Fix FileCheck prefix in the histogram test.

---
 .../memprof-padding-histogram.test| 152 +-
 1 file changed, 76 insertions(+), 76 deletions(-)

diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test 
b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
index 79521f3aceb6d..2d0346e7cb259 100644
--- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -21,79 +21,79 @@ CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
 CHECK-NEXT:   -
 
 CHECK:   Records:
-CHEC-NEXTFunctionGUID: {{[0-9]+}}
-CHEC-NEXTAllocSites:
-CHEC-NEXT-
-CHEC-NEXT  Callstack:
-CHEC-NEXT  -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 3
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT  MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 5
-CHEC-NEXTMinAccessCount: 5
-CHEC-NEXTMaxAccessCount: 5
-CHEC-NEXTTotalSize: 24
-CHEC-NEXTMinSize: 24
-CHEC-NEXTMaxSize: 24
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 20
-CHEC-NEXTMinAccessDensity: 20
-CHEC-NEXTMaxAccessDensity: 20
-CHEC-NEXTTotalLifetimeAccessDensity: 2
-CHEC-NEXTMinLifetimeAccessDensity: 2
-CHEC-NEXTMaxLifetimeAccessDensity: 2
-CHEC-NEXTAccessHistogramSize: 3
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -1 -2
-CHEC-NEXT-
-CHEC-NEXT  Callstack:
-CHEC-NEXT  -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 10
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT  MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 4
-CHEC-NEXTMinAccessCount: 4
-CHEC-NEXTMaxAccessCount: 4
-CHEC-NEXTTotalSize: 48
-CHEC-NEXTMinSize: 48
-CHEC-NEXTMaxSize: 48
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 8
-CHEC-NEXTMinAccessDensity: 8
-CHEC-NEXTMaxAccessDensity: 8
-CHEC-NEXTTotalLifetimeAccessDensity: 8000
-CHEC-NEXTMinLifetimeAccessDensity: 8000
-CHEC-NEXTMaxLifetimeAccessDensity: 8000
-CHEC-NEXTAccessHistogramSize: 6
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -0 -0 -0 -1 -1
+CHECK-NEXTFunctionGUID: {{[0-9]+}}
+CHECK-NEXTAllocSites:
+CHECK-NEXT-
+CHECK-NEXT  Callstack:
+CHECK-NEXT  -
+CHECK-NEXTFunction: {{[0-9]+}}
+CHECK-NEXTSymbolName: main
+CHECK-NEXTLineOffset: 3
+CHECK-NEXTColumn: 10
+CHECK-NEXTInline: 0
+CHECK-NEXT  MemInfoBlock:
+CHECK-NEXTAllocCount: 1
+CHECK-NEXTTotalAccessCount: 5
+CHECK-NEXTMinAccessCount: 5
+CHECK-NEXTMaxAccessCount: 5
+CHECK-NEXTTotalSize: 24
+CHECK-NEXTMinSize: 24
+CHECK-NEXTMaxSize: 24
+CHECK-NEXTAllocTimestamp: {{[0-9]+}}
+CHECK-NEXTDeallocTimestamp: {{[0-9]+}}
+CHECK-NEXTTotalLifetime: 0
+CHECK-NEXTMinLifetime: 0
+CHECK-NEXTMaxLifetime: 0
+CHECK-NEXTAllocCpuId: 11
+CHECK-NEXTDeallocCpuId: 11
+CHECK-NEXTNumMigratedCpu: 0
+CHECK-NEXTNumLifetimeOverlaps: 0
+CHECK-NEXTNumSameAllocCpu: 0
+CHECK-NEXTNumSameDeallocCpu: 0
+CHECK-NEXTDataTypeId: 0
+CHECK-NEXTTotalAccessDensity: 20
+CHECK-NEXTMinAccessDensity: 20
+CHECK-NEXTMaxAccessDensity: 20
+CHECK-NEXTTotalLifetimeAccessDensity: 2
+CHECK-NEXTMinLifetimeAccessDensity: 2
+CHECK-NEXTMaxLifetimeAccessDensity: 2
+CHECK-NEXTAccessHistogramSize: 3
+CHECK-NEXTAccessHistogram: {{[0-9]+}}
+CHECK-NEXTAcce

[llvm-branch-commits] [llvm] [MemProf] Fix FileCheck prefix in the histogram test. (PR #150506)

2025-07-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish updated 
https://github.com/llvm/llvm-project/pull/150506

>From 385e2e93795c3520760c7592a45f256aaad0694b Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Thu, 24 Jul 2025 06:25:00 +
Subject: [PATCH] Fix FileCheck prefix in the histogram test.

---
 .../memprof-padding-histogram.test| 152 +-
 1 file changed, 76 insertions(+), 76 deletions(-)

diff --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test 
b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
index 79521f3aceb6d..2d0346e7cb259 100644
--- a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -21,79 +21,79 @@ CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}}
 CHECK-NEXT:   -
 
 CHECK:   Records:
-CHEC-NEXTFunctionGUID: {{[0-9]+}}
-CHEC-NEXTAllocSites:
-CHEC-NEXT-
-CHEC-NEXT  Callstack:
-CHEC-NEXT  -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 3
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT  MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 5
-CHEC-NEXTMinAccessCount: 5
-CHEC-NEXTMaxAccessCount: 5
-CHEC-NEXTTotalSize: 24
-CHEC-NEXTMinSize: 24
-CHEC-NEXTMaxSize: 24
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 20
-CHEC-NEXTMinAccessDensity: 20
-CHEC-NEXTMaxAccessDensity: 20
-CHEC-NEXTTotalLifetimeAccessDensity: 2
-CHEC-NEXTMinLifetimeAccessDensity: 2
-CHEC-NEXTMaxLifetimeAccessDensity: 2
-CHEC-NEXTAccessHistogramSize: 3
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -1 -2
-CHEC-NEXT-
-CHEC-NEXT  Callstack:
-CHEC-NEXT  -
-CHEC-NEXTFunction: {{[0-9]+}}
-CHEC-NEXTSymbolName: main
-CHEC-NEXTLineOffset: 10
-CHEC-NEXTColumn: 10
-CHEC-NEXTInline: 0
-CHEC-NEXT  MemInfoBlock:
-CHEC-NEXTAllocCount: 1
-CHEC-NEXTTotalAccessCount: 4
-CHEC-NEXTMinAccessCount: 4
-CHEC-NEXTMaxAccessCount: 4
-CHEC-NEXTTotalSize: 48
-CHEC-NEXTMinSize: 48
-CHEC-NEXTMaxSize: 48
-CHEC-NEXTAllocTimestamp: {{[0-9]+}}
-CHEC-NEXTDeallocTimestamp: {{[0-9]+}}
-CHEC-NEXTTotalLifetime: 0
-CHEC-NEXTMinLifetime: 0
-CHEC-NEXTMaxLifetime: 0
-CHEC-NEXTAllocCpuId: 11
-CHEC-NEXTDeallocCpuId: 11
-CHEC-NEXTNumMigratedCpu: 0
-CHEC-NEXTNumLifetimeOverlaps: 0
-CHEC-NEXTNumSameAllocCpu: 0
-CHEC-NEXTNumSameDeallocCpu: 0
-CHEC-NEXTDataTypeId: 0
-CHEC-NEXTTotalAccessDensity: 8
-CHEC-NEXTMinAccessDensity: 8
-CHEC-NEXTMaxAccessDensity: 8
-CHEC-NEXTTotalLifetimeAccessDensity: 8000
-CHEC-NEXTMinLifetimeAccessDensity: 8000
-CHEC-NEXTMaxLifetimeAccessDensity: 8000
-CHEC-NEXTAccessHistogramSize: 6
-CHEC-NEXTAccessHistogram: {{[0-9]+}}
-CHEC-NEXTAccessHistogramValues: -2 -0 -0 -0 -1 -1
+CHECK-NEXTFunctionGUID: {{[0-9]+}}
+CHECK-NEXTAllocSites:
+CHECK-NEXT-
+CHECK-NEXT  Callstack:
+CHECK-NEXT  -
+CHECK-NEXTFunction: {{[0-9]+}}
+CHECK-NEXTSymbolName: main
+CHECK-NEXTLineOffset: 3
+CHECK-NEXTColumn: 10
+CHECK-NEXTInline: 0
+CHECK-NEXT  MemInfoBlock:
+CHECK-NEXTAllocCount: 1
+CHECK-NEXTTotalAccessCount: 5
+CHECK-NEXTMinAccessCount: 5
+CHECK-NEXTMaxAccessCount: 5
+CHECK-NEXTTotalSize: 24
+CHECK-NEXTMinSize: 24
+CHECK-NEXTMaxSize: 24
+CHECK-NEXTAllocTimestamp: {{[0-9]+}}
+CHECK-NEXTDeallocTimestamp: {{[0-9]+}}
+CHECK-NEXTTotalLifetime: 0
+CHECK-NEXTMinLifetime: 0
+CHECK-NEXTMaxLifetime: 0
+CHECK-NEXTAllocCpuId: 11
+CHECK-NEXTDeallocCpuId: 11
+CHECK-NEXTNumMigratedCpu: 0
+CHECK-NEXTNumLifetimeOverlaps: 0
+CHECK-NEXTNumSameAllocCpu: 0
+CHECK-NEXTNumSameDeallocCpu: 0
+CHECK-NEXTDataTypeId: 0
+CHECK-NEXTTotalAccessDensity: 20
+CHECK-NEXTMinAccessDensity: 20
+CHECK-NEXTMaxAccessDensity: 20
+CHECK-NEXTTotalLifetimeAccessDensity: 2
+CHECK-NEXTMinLifetimeAccessDensity: 2
+CHECK-NEXTMaxLifetimeAccessDensity: 2
+CHECK-NEXTAccessHistogramSize: 3
+CHECK-NEXTAccessHistogram: {{[0-9]+}}
+CHECK-NEXTAcce

[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)

2025-07-30 Thread Mingming Liu via llvm-branch-commits


@@ -344,6 +350,36 @@ void 
ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile &Obj,
 exitWithError("no executable segment found", FileName);
 }
 
+uint64_t ProfiledBinary::CanonicalizeNonTextAddress(uint64_t Address) {
+  uint64_t FileOffset = 0;
+  auto MMapIter = NonTextMMapEvents.lower_bound(Address);
+  if (MMapIter == NonTextMMapEvents.end())
+return Address; // No non-text mmap event found, return the address as is.
+
+  const auto &MMapEvent = MMapIter->second;
+
+  // If the address is within the non-text mmap event, calculates its file

mingmingl-llvm wrote:

done.

https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)

2025-07-30 Thread Mingming Liu via llvm-branch-commits


@@ -1027,6 +1027,20 @@ class FunctionSamples {
 return VirtualCallsiteTypeCounts[mapIRLocToProfileLoc(Loc)];
   }
 
+  /// At location \p Loc, add a type sample for the given \p Type with
+  /// \p Count. This function uses saturating arithmetic to clamp the result to
+  /// maximum uint64_t (the counter type) and returns counter_overflow to 
caller
+  /// if the actual result is larger than maximum uint64_t.
+  sampleprof_error addTypeSamplesAt(const LineLocation &Loc, FunctionId Type,
+uint64_t Count) {
+auto &TypeCounts = getTypeSamplesAt(Loc);
+bool Overflowed = false;
+TypeCounts[Type] = SaturatingMultiplyAdd(Count, /* Weight= */ (uint64_t)1,

mingmingl-llvm wrote:

As clarified offline, the `SaturatingMultiplyAdd` will clamp the result if 
overflow happens, and we insert the return value of `SaturatingMultiplyAdd`. 
From this perspective, counter_overflow is more of informative warning as 
opposed to a real error that wraps around a large unsigned integer into another 
value. 

I updated the comment to make this more explicit, and will probably prepare a 
separate change around the warning handling (in SampleProf or InstrProf).

https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-profgen] Extend llvm-profgen to generate vtable profiles with data access events. (PR #148013)

2025-07-30 Thread Mingming Liu via llvm-branch-commits


@@ -611,23 +635,11 @@ class PerfScriptReader : public PerfReaderBase {
   static SmallVector TempFileCleanups;
 
 protected:
-  // The parsed MMap event
-  struct MMapEvent {
-int64_t PID = 0;
-uint64_t Address = 0;
-uint64_t Size = 0;
-uint64_t Offset = 0;
-StringRef BinaryPath;
-  };
-
   // Check whether a given line is LBR sample
   static bool isLBRSample(StringRef Line);
   // Check whether a given line is MMAP event
   static bool isMMapEvent(StringRef Line);
-  // Parse a single line of a PERF_RECORD_MMAP event looking for a
-  // mapping between the binary name and its memory layout.
-  static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
-MMapEvent &MMap);
+

mingmingl-llvm wrote:

done.

https://github.com/llvm/llvm-project/pull/148013
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm][AsmPrinter] Emit call graph section (PR #87576)

2025-07-30 Thread Prabhu Rajasekaran via llvm-branch-commits

https://github.com/Prabhuk updated 
https://github.com/llvm/llvm-project/pull/87576

>From 6b67376bd5e1f21606017c83cc67f2186ba36a33 Mon Sep 17 00:00:00 2001
From: Necip Fazil Yildiran 
Date: Thu, 13 Mar 2025 01:41:04 +
Subject: [PATCH 1/6] Updated the test as reviewers suggested.

Created using spr 1.3.6-beta.1
---
 llvm/test/CodeGen/X86/call-graph-section.ll | 66 +++
 llvm/test/CodeGen/call-graph-section.ll | 73 -
 2 files changed, 66 insertions(+), 73 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/call-graph-section.ll
 delete mode 100644 llvm/test/CodeGen/call-graph-section.ll

diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll 
b/llvm/test/CodeGen/X86/call-graph-section.ll
new file mode 100644
index 0..a77a2b8051ed3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/call-graph-section.ll
@@ -0,0 +1,66 @@
+;; Tests that we store the type identifiers in .callgraph section of the 
binary.
+
+; RUN: llc --call-graph-section -filetype=obj -o - < %s | \
+; RUN: llvm-readelf -x .callgraph - | FileCheck %s
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @foo() #0 !type !4 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @bar(i8 signext %a) #0 !type !5 {
+entry:
+  %a.addr = alloca i8, align 1
+  store i8 %a, ptr %a.addr, align 1
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local ptr @baz(ptr %a) #0 !type !6 {
+entry:
+  %a.addr = alloca ptr, align 8
+  store ptr %a, ptr %a.addr, align 8
+  ret ptr null
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @main() #0 !type !7 {
+entry:
+  %retval = alloca i32, align 4
+  %fp_foo = alloca ptr, align 8
+  %a = alloca i8, align 1
+  %fp_bar = alloca ptr, align 8
+  %fp_baz = alloca ptr, align 8
+  store i32 0, ptr %retval, align 4
+  store ptr @foo, ptr %fp_foo, align 8
+  %0 = load ptr, ptr %fp_foo, align 8
+  call void (...) %0() [ "callee_type"(metadata !"_ZTSFvE.generalized") ]
+  store ptr @bar, ptr %fp_bar, align 8
+  %1 = load ptr, ptr %fp_bar, align 8
+  %2 = load i8, ptr %a, align 1
+  %call = call i32 %1(i8 signext %2) [ "callee_type"(metadata 
!"_ZTSFicE.generalized") ]
+  store ptr @baz, ptr %fp_baz, align 8
+  %3 = load ptr, ptr %fp_baz, align 8
+  %call1 = call ptr %3(ptr %a) [ "callee_type"(metadata 
!"_ZTSFPvS_E.generalized") ]
+  call void @foo() [ "callee_type"(metadata !"_ZTSFvE.generalized") ]
+  %4 = load i8, ptr %a, align 1
+  %call2 = call i32 @bar(i8 signext %4) [ "callee_type"(metadata 
!"_ZTSFicE.generalized") ]
+  %call3 = call ptr @baz(ptr %a) [ "callee_type"(metadata 
!"_ZTSFPvS_E.generalized") ]
+  ret void
+}
+
+;; Check that the numeric type id (md5 hash) for the below type ids are emitted
+;; to the callgraph section.
+
+; CHECK: Hex dump of section '.callgraph':
+
+; CHECK-DAG: 2444f731 f5eecb3e
+!4 = !{i64 0, !"_ZTSFvE.generalized"}
+; CHECK-DAG: 5486bc59 814b8e30
+!5 = !{i64 0, !"_ZTSFicE.generalized"}
+; CHECK-DAG: 7ade6814 f897fd77
+!6 = !{i64 0, !"_ZTSFPvS_E.generalized"}
+; CHECK-DAG: caaf769a 600968fa
+!7 = !{i64 0, !"_ZTSFiE.generalized"}
diff --git a/llvm/test/CodeGen/call-graph-section.ll 
b/llvm/test/CodeGen/call-graph-section.ll
deleted file mode 100644
index bb158d11e82c9..0
--- a/llvm/test/CodeGen/call-graph-section.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-; Tests that we store the type identifiers in .callgraph section of the binary.
-
-; RUN: llc --call-graph-section -filetype=obj -o - < %s | \
-; RUN: llvm-readelf -x .callgraph - | FileCheck %s
-
-target triple = "x86_64-unknown-linux-gnu"
-
-define dso_local void @foo() #0 !type !4 {
-entry:
-  ret void
-}
-
-define dso_local i32 @bar(i8 signext %a) #0 !type !5 {
-entry:
-  %a.addr = alloca i8, align 1
-  store i8 %a, i8* %a.addr, align 1
-  ret i32 0
-}
-
-define dso_local i32* @baz(i8* %a) #0 !type !6 {
-entry:
-  %a.addr = alloca i8*, align 8
-  store i8* %a, i8** %a.addr, align 8
-  ret i32* null
-}
-
-define dso_local i32 @main() #0 !type !7 {
-entry:
-  %retval = alloca i32, align 4
-  %fp_foo = alloca void (...)*, align 8
-  %a = alloca i8, align 1
-  %fp_bar = alloca i32 (i8)*, align 8
-  %fp_baz = alloca i32* (i8*)*, align 8
-  store i32 0, i32* %retval, align 4
-  store void (...)* bitcast (void ()* @foo to void (...)*), void (...)** 
%fp_foo, align 8
-  %0 = load void (...)*, void (...)** %fp_foo, align 8
-  call void (...) %0() [ "callee_type"(metadata !"_ZTSFvE.generalized") ]
-  store i32 (i8)* @bar, i32 (i8)** %fp_bar, align 8
-  %1 = load i32 (i8)*, i32 (i8)** %fp_bar, align 8
-  %2 = load i8, i8* %a, align 1
-  %call = call i32 %1(i8 signext %2) [ "callee_type"(metadata 
!"_ZTSFicE.generalized") ]
-  store i32* (i8*)* @baz, i32* (i8*)** %fp_baz, align 8
-  %3 = load i32* (i8*)*, i32* (i8*)** %fp_baz, align 8
-  %call1 = call i32* %3(i8* %a) [ "callee_type"(metadata 
!"_ZTSFPvS_E.generalized") ]
-  call void @foo() [ "callee_type"(meta

[llvm-branch-commits] [clang] [clang] Introduce CallGraphSection option (PR #117037)

2025-07-30 Thread Prabhu Rajasekaran via llvm-branch-commits

https://github.com/Prabhuk updated 
https://github.com/llvm/llvm-project/pull/117037

>From 6a12be2c5b60a95a06875b0b2c4f14228d1fa882 Mon Sep 17 00:00:00 2001
From: prabhukr 
Date: Wed, 12 Mar 2025 23:30:01 +
Subject: [PATCH 1/2] Fix EOF newlines.

Created using spr 1.3.6-beta.1
---
 clang/test/Driver/call-graph-section.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Driver/call-graph-section.c 
b/clang/test/Driver/call-graph-section.c
index 108446729d857..5832aa6754137 100644
--- a/clang/test/Driver/call-graph-section.c
+++ b/clang/test/Driver/call-graph-section.c
@@ -2,4 +2,4 @@
 // RUN: %clang -### -S -fcall-graph-section -fno-call-graph-section %s 2>&1 | 
FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s
 
 // CALL-GRAPH-SECTION: "-fcall-graph-section"
-// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section"
\ No newline at end of file
+// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section"

>From c67f714eaab9a7f1e4d2d76da28641b05710231d Mon Sep 17 00:00:00 2001
From: prabhukr 
Date: Mon, 21 Jul 2025 23:53:52 +
Subject: [PATCH 2/2] Fix review comment on test file.

Created using spr 1.3.6-beta.1
---
 clang/test/Driver/call-graph-section.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Driver/call-graph-section.c 
b/clang/test/Driver/call-graph-section.c
index 5832aa6754137..563f36de4119e 100644
--- a/clang/test/Driver/call-graph-section.c
+++ b/clang/test/Driver/call-graph-section.c
@@ -1,5 +1,5 @@
-// RUN: %clang -### -S -fcall-graph-section %s 2>&1 | FileCheck 
--check-prefix=CALL-GRAPH-SECTION %s
-// RUN: %clang -### -S -fcall-graph-section -fno-call-graph-section %s 2>&1 | 
FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s
+// RUN: %clang -### -fcall-graph-section %s 2>&1 | FileCheck 
--check-prefix=CALL-GRAPH-SECTION %s
+// RUN: %clang -### -fcall-graph-section -fno-call-graph-section %s 2>&1 | 
FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s
 
 // CALL-GRAPH-SECTION: "-fcall-graph-section"
 // NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section"

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Introduce CallGraphSection option (PR #117037)

2025-07-30 Thread Prabhu Rajasekaran via llvm-branch-commits

https://github.com/Prabhuk updated 
https://github.com/llvm/llvm-project/pull/117037

>From 6a12be2c5b60a95a06875b0b2c4f14228d1fa882 Mon Sep 17 00:00:00 2001
From: prabhukr 
Date: Wed, 12 Mar 2025 23:30:01 +
Subject: [PATCH 1/2] Fix EOF newlines.

Created using spr 1.3.6-beta.1
---
 clang/test/Driver/call-graph-section.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Driver/call-graph-section.c 
b/clang/test/Driver/call-graph-section.c
index 108446729d857..5832aa6754137 100644
--- a/clang/test/Driver/call-graph-section.c
+++ b/clang/test/Driver/call-graph-section.c
@@ -2,4 +2,4 @@
 // RUN: %clang -### -S -fcall-graph-section -fno-call-graph-section %s 2>&1 | 
FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s
 
 // CALL-GRAPH-SECTION: "-fcall-graph-section"
-// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section"
\ No newline at end of file
+// NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section"

>From c67f714eaab9a7f1e4d2d76da28641b05710231d Mon Sep 17 00:00:00 2001
From: prabhukr 
Date: Mon, 21 Jul 2025 23:53:52 +
Subject: [PATCH 2/2] Fix review comment on test file.

Created using spr 1.3.6-beta.1
---
 clang/test/Driver/call-graph-section.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Driver/call-graph-section.c 
b/clang/test/Driver/call-graph-section.c
index 5832aa6754137..563f36de4119e 100644
--- a/clang/test/Driver/call-graph-section.c
+++ b/clang/test/Driver/call-graph-section.c
@@ -1,5 +1,5 @@
-// RUN: %clang -### -S -fcall-graph-section %s 2>&1 | FileCheck 
--check-prefix=CALL-GRAPH-SECTION %s
-// RUN: %clang -### -S -fcall-graph-section -fno-call-graph-section %s 2>&1 | 
FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s
+// RUN: %clang -### -fcall-graph-section %s 2>&1 | FileCheck 
--check-prefix=CALL-GRAPH-SECTION %s
+// RUN: %clang -### -fcall-graph-section -fno-call-graph-section %s 2>&1 | 
FileCheck --check-prefix=NO-CALL-GRAPH-SECTION %s
 
 // CALL-GRAPH-SECTION: "-fcall-graph-section"
 // NO-CALL-GRAPH-SECTION-NOT: "-fcall-graph-section"

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] callee_type metadata for indirect calls (PR #117036)

2025-07-30 Thread Prabhu Rajasekaran via llvm-branch-commits

https://github.com/Prabhuk updated 
https://github.com/llvm/llvm-project/pull/117036

>From b7fbe09b32ff02d4f7c52d82fbf8b5cd28138852 Mon Sep 17 00:00:00 2001
From: prabhukr 
Date: Wed, 23 Apr 2025 04:05:47 +
Subject: [PATCH] Address review comments.

Created using spr 1.3.6-beta.1
---
 clang/lib/CodeGen/CGCall.cpp|  8 
 clang/lib/CodeGen/CodeGenModule.cpp | 10 +-
 clang/lib/CodeGen/CodeGenModule.h   |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 185ee1a970aac..d8ab7140f7943 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5780,19 +5780,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
   if (callOrInvoke) {
 *callOrInvoke = CI;
 if (CGM.getCodeGenOpts().CallGraphSection) {
-  assert((TargetDecl && TargetDecl->getFunctionType() ||
-  Callee.getAbstractInfo().getCalleeFunctionProtoType()) &&
- "cannot find callsite type");
   QualType CST;
   if (TargetDecl && TargetDecl->getFunctionType())
 CST = QualType(TargetDecl->getFunctionType(), 0);
   else if (const auto *FPT =
Callee.getAbstractInfo().getCalleeFunctionProtoType())
 CST = QualType(FPT, 0);
+  else
+llvm_unreachable(
+"Cannot find the callee type to generate callee_type metadata.");
 
   // Set type identifier metadata of indirect calls for call graph section.
   if (!CST.isNull())
-CGM.CreateCalleeTypeMetadataForIcall(CST, *callOrInvoke);
+CGM.createCalleeTypeMetadataForIcall(CST, *callOrInvoke);
 }
   }
 
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 43cd2405571cf..2fc99639a75cb 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2654,7 +2654,7 @@ void 
CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   // Skip available_externally functions. They won't be codegen'ed in the
   // current module anyway.
   if (getContext().GetGVALinkageForFunction(FD) != GVA_AvailableExternally)
-CreateFunctionTypeMetadataForIcall(FD, F);
+createFunctionTypeMetadataForIcall(FD, F);
 }
   }
 
@@ -2868,7 +2868,7 @@ static bool hasExistingGeneralizedTypeMD(llvm::Function 
*F) {
   return MD->hasGeneralizedMDString();
 }
 
-void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
+void CodeGenModule::createFunctionTypeMetadataForIcall(const FunctionDecl *FD,
llvm::Function *F) {
   if (CodeGenOpts.CallGraphSection && !hasExistingGeneralizedTypeMD(F) &&
   (!F->hasLocalLinkage() ||
@@ -2898,7 +2898,7 @@ void 
CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
   F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId));
 }
 
-void CodeGenModule::CreateCalleeTypeMetadataForIcall(const QualType &QT,
+void CodeGenModule::createCalleeTypeMetadataForIcall(const QualType &QT,
  llvm::CallBase *CB) {
   // Only if needed for call graph section and only for indirect calls.
   if (!CodeGenOpts.CallGraphSection || !CB->isIndirectCall())
@@ -2909,7 +2909,7 @@ void 
CodeGenModule::CreateCalleeTypeMetadataForIcall(const QualType &QT,
   getLLVMContext(), {llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
  llvm::Type::getInt64Ty(getLLVMContext()), 0)),
  TypeIdMD});
-  llvm::MDTuple *MDN = llvm::MDNode::get(getLLVMContext(), { TypeTuple });
+  llvm::MDTuple *MDN = llvm::MDNode::get(getLLVMContext(), {TypeTuple});
   CB->setMetadata(llvm::LLVMContext::MD_callee_type, MDN);
 }
 
@@ -3041,7 +3041,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, 
llvm::Function *F,
   // jump table.
   if (!CodeGenOpts.SanitizeCfiCrossDso ||
   !CodeGenOpts.SanitizeCfiCanonicalJumpTables)
-CreateFunctionTypeMetadataForIcall(FD, F);
+createFunctionTypeMetadataForIcall(FD, F);
 
   if (LangOpts.Sanitize.has(SanitizerKind::KCFI))
 setKCFIType(FD, F);
diff --git a/clang/lib/CodeGen/CodeGenModule.h 
b/clang/lib/CodeGen/CodeGenModule.h
index dfbe4388349dd..4b53f0f241b52 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1619,11 +1619,11 @@ class CodeGenModule : public CodeGenTypeCache {
   llvm::Metadata *CreateMetadataIdentifierGeneralized(QualType T);
 
   /// Create and attach type metadata to the given function.
-  void CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
+  void createFunctionTypeMetadataForIcall(const FunctionDecl *FD,
   llvm::Function *F);
 
   /// Create and attach type metadata to the given call.
-  void CreateCalleeTypeMetadataForIcall(const QualType &QT, llvm::CallBase 
*CB);
+  void createCa

[llvm-branch-commits] [llvm] [llvm][AsmPrinter] Emit call graph section (PR #87576)

2025-07-30 Thread Prabhu Rajasekaran via llvm-branch-commits

https://github.com/Prabhuk updated 
https://github.com/llvm/llvm-project/pull/87576

>From 6b67376bd5e1f21606017c83cc67f2186ba36a33 Mon Sep 17 00:00:00 2001
From: Necip Fazil Yildiran 
Date: Thu, 13 Mar 2025 01:41:04 +
Subject: [PATCH 1/6] Updated the test as reviewers suggested.

Created using spr 1.3.6-beta.1
---
 llvm/test/CodeGen/X86/call-graph-section.ll | 66 +++
 llvm/test/CodeGen/call-graph-section.ll | 73 -
 2 files changed, 66 insertions(+), 73 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/call-graph-section.ll
 delete mode 100644 llvm/test/CodeGen/call-graph-section.ll

diff --git a/llvm/test/CodeGen/X86/call-graph-section.ll 
b/llvm/test/CodeGen/X86/call-graph-section.ll
new file mode 100644
index 0..a77a2b8051ed3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/call-graph-section.ll
@@ -0,0 +1,66 @@
+;; Tests that we store the type identifiers in .callgraph section of the 
binary.
+
+; RUN: llc --call-graph-section -filetype=obj -o - < %s | \
+; RUN: llvm-readelf -x .callgraph - | FileCheck %s
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @foo() #0 !type !4 {
+entry:
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @bar(i8 signext %a) #0 !type !5 {
+entry:
+  %a.addr = alloca i8, align 1
+  store i8 %a, ptr %a.addr, align 1
+  ret i32 0
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local ptr @baz(ptr %a) #0 !type !6 {
+entry:
+  %a.addr = alloca ptr, align 8
+  store ptr %a, ptr %a.addr, align 8
+  ret ptr null
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @main() #0 !type !7 {
+entry:
+  %retval = alloca i32, align 4
+  %fp_foo = alloca ptr, align 8
+  %a = alloca i8, align 1
+  %fp_bar = alloca ptr, align 8
+  %fp_baz = alloca ptr, align 8
+  store i32 0, ptr %retval, align 4
+  store ptr @foo, ptr %fp_foo, align 8
+  %0 = load ptr, ptr %fp_foo, align 8
+  call void (...) %0() [ "callee_type"(metadata !"_ZTSFvE.generalized") ]
+  store ptr @bar, ptr %fp_bar, align 8
+  %1 = load ptr, ptr %fp_bar, align 8
+  %2 = load i8, ptr %a, align 1
+  %call = call i32 %1(i8 signext %2) [ "callee_type"(metadata 
!"_ZTSFicE.generalized") ]
+  store ptr @baz, ptr %fp_baz, align 8
+  %3 = load ptr, ptr %fp_baz, align 8
+  %call1 = call ptr %3(ptr %a) [ "callee_type"(metadata 
!"_ZTSFPvS_E.generalized") ]
+  call void @foo() [ "callee_type"(metadata !"_ZTSFvE.generalized") ]
+  %4 = load i8, ptr %a, align 1
+  %call2 = call i32 @bar(i8 signext %4) [ "callee_type"(metadata 
!"_ZTSFicE.generalized") ]
+  %call3 = call ptr @baz(ptr %a) [ "callee_type"(metadata 
!"_ZTSFPvS_E.generalized") ]
+  ret void
+}
+
+;; Check that the numeric type id (md5 hash) for the below type ids are emitted
+;; to the callgraph section.
+
+; CHECK: Hex dump of section '.callgraph':
+
+; CHECK-DAG: 2444f731 f5eecb3e
+!4 = !{i64 0, !"_ZTSFvE.generalized"}
+; CHECK-DAG: 5486bc59 814b8e30
+!5 = !{i64 0, !"_ZTSFicE.generalized"}
+; CHECK-DAG: 7ade6814 f897fd77
+!6 = !{i64 0, !"_ZTSFPvS_E.generalized"}
+; CHECK-DAG: caaf769a 600968fa
+!7 = !{i64 0, !"_ZTSFiE.generalized"}
diff --git a/llvm/test/CodeGen/call-graph-section.ll 
b/llvm/test/CodeGen/call-graph-section.ll
deleted file mode 100644
index bb158d11e82c9..0
--- a/llvm/test/CodeGen/call-graph-section.ll
+++ /dev/null
@@ -1,73 +0,0 @@
-; Tests that we store the type identifiers in .callgraph section of the binary.
-
-; RUN: llc --call-graph-section -filetype=obj -o - < %s | \
-; RUN: llvm-readelf -x .callgraph - | FileCheck %s
-
-target triple = "x86_64-unknown-linux-gnu"
-
-define dso_local void @foo() #0 !type !4 {
-entry:
-  ret void
-}
-
-define dso_local i32 @bar(i8 signext %a) #0 !type !5 {
-entry:
-  %a.addr = alloca i8, align 1
-  store i8 %a, i8* %a.addr, align 1
-  ret i32 0
-}
-
-define dso_local i32* @baz(i8* %a) #0 !type !6 {
-entry:
-  %a.addr = alloca i8*, align 8
-  store i8* %a, i8** %a.addr, align 8
-  ret i32* null
-}
-
-define dso_local i32 @main() #0 !type !7 {
-entry:
-  %retval = alloca i32, align 4
-  %fp_foo = alloca void (...)*, align 8
-  %a = alloca i8, align 1
-  %fp_bar = alloca i32 (i8)*, align 8
-  %fp_baz = alloca i32* (i8*)*, align 8
-  store i32 0, i32* %retval, align 4
-  store void (...)* bitcast (void ()* @foo to void (...)*), void (...)** 
%fp_foo, align 8
-  %0 = load void (...)*, void (...)** %fp_foo, align 8
-  call void (...) %0() [ "callee_type"(metadata !"_ZTSFvE.generalized") ]
-  store i32 (i8)* @bar, i32 (i8)** %fp_bar, align 8
-  %1 = load i32 (i8)*, i32 (i8)** %fp_bar, align 8
-  %2 = load i8, i8* %a, align 1
-  %call = call i32 %1(i8 signext %2) [ "callee_type"(metadata 
!"_ZTSFicE.generalized") ]
-  store i32* (i8*)* @baz, i32* (i8*)** %fp_baz, align 8
-  %3 = load i32* (i8*)*, i32* (i8*)** %fp_baz, align 8
-  %call1 = call i32* %3(i8* %a) [ "callee_type"(metadata 
!"_ZTSFPvS_E.generalized") ]
-  call void @foo() [ "callee_type"(meta

[llvm-branch-commits] callgraph make flag experimental (PR #151402)

2025-07-30 Thread Prabhu Rajasekaran via llvm-branch-commits

https://github.com/Prabhuk created 
https://github.com/llvm/llvm-project/pull/151402

None


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] callee_type metadata for indirect calls (PR #117036)

2025-07-30 Thread Prabhu Rajasekaran via llvm-branch-commits

https://github.com/Prabhuk updated 
https://github.com/llvm/llvm-project/pull/117036

>From b7fbe09b32ff02d4f7c52d82fbf8b5cd28138852 Mon Sep 17 00:00:00 2001
From: prabhukr 
Date: Wed, 23 Apr 2025 04:05:47 +
Subject: [PATCH] Address review comments.

Created using spr 1.3.6-beta.1
---
 clang/lib/CodeGen/CGCall.cpp|  8 
 clang/lib/CodeGen/CodeGenModule.cpp | 10 +-
 clang/lib/CodeGen/CodeGenModule.h   |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 185ee1a970aac..d8ab7140f7943 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5780,19 +5780,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
   if (callOrInvoke) {
 *callOrInvoke = CI;
 if (CGM.getCodeGenOpts().CallGraphSection) {
-  assert((TargetDecl && TargetDecl->getFunctionType() ||
-  Callee.getAbstractInfo().getCalleeFunctionProtoType()) &&
- "cannot find callsite type");
   QualType CST;
   if (TargetDecl && TargetDecl->getFunctionType())
 CST = QualType(TargetDecl->getFunctionType(), 0);
   else if (const auto *FPT =
Callee.getAbstractInfo().getCalleeFunctionProtoType())
 CST = QualType(FPT, 0);
+  else
+llvm_unreachable(
+"Cannot find the callee type to generate callee_type metadata.");
 
   // Set type identifier metadata of indirect calls for call graph section.
   if (!CST.isNull())
-CGM.CreateCalleeTypeMetadataForIcall(CST, *callOrInvoke);
+CGM.createCalleeTypeMetadataForIcall(CST, *callOrInvoke);
 }
   }
 
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 43cd2405571cf..2fc99639a75cb 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2654,7 +2654,7 @@ void 
CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   // Skip available_externally functions. They won't be codegen'ed in the
   // current module anyway.
   if (getContext().GetGVALinkageForFunction(FD) != GVA_AvailableExternally)
-CreateFunctionTypeMetadataForIcall(FD, F);
+createFunctionTypeMetadataForIcall(FD, F);
 }
   }
 
@@ -2868,7 +2868,7 @@ static bool hasExistingGeneralizedTypeMD(llvm::Function 
*F) {
   return MD->hasGeneralizedMDString();
 }
 
-void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
+void CodeGenModule::createFunctionTypeMetadataForIcall(const FunctionDecl *FD,
llvm::Function *F) {
   if (CodeGenOpts.CallGraphSection && !hasExistingGeneralizedTypeMD(F) &&
   (!F->hasLocalLinkage() ||
@@ -2898,7 +2898,7 @@ void 
CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
   F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId));
 }
 
-void CodeGenModule::CreateCalleeTypeMetadataForIcall(const QualType &QT,
+void CodeGenModule::createCalleeTypeMetadataForIcall(const QualType &QT,
  llvm::CallBase *CB) {
   // Only if needed for call graph section and only for indirect calls.
   if (!CodeGenOpts.CallGraphSection || !CB->isIndirectCall())
@@ -2909,7 +2909,7 @@ void 
CodeGenModule::CreateCalleeTypeMetadataForIcall(const QualType &QT,
   getLLVMContext(), {llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
  llvm::Type::getInt64Ty(getLLVMContext()), 0)),
  TypeIdMD});
-  llvm::MDTuple *MDN = llvm::MDNode::get(getLLVMContext(), { TypeTuple });
+  llvm::MDTuple *MDN = llvm::MDNode::get(getLLVMContext(), {TypeTuple});
   CB->setMetadata(llvm::LLVMContext::MD_callee_type, MDN);
 }
 
@@ -3041,7 +3041,7 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, 
llvm::Function *F,
   // jump table.
   if (!CodeGenOpts.SanitizeCfiCrossDso ||
   !CodeGenOpts.SanitizeCfiCanonicalJumpTables)
-CreateFunctionTypeMetadataForIcall(FD, F);
+createFunctionTypeMetadataForIcall(FD, F);
 
   if (LangOpts.Sanitize.has(SanitizerKind::KCFI))
 setKCFIType(FD, F);
diff --git a/clang/lib/CodeGen/CodeGenModule.h 
b/clang/lib/CodeGen/CodeGenModule.h
index dfbe4388349dd..4b53f0f241b52 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1619,11 +1619,11 @@ class CodeGenModule : public CodeGenTypeCache {
   llvm::Metadata *CreateMetadataIdentifierGeneralized(QualType T);
 
   /// Create and attach type metadata to the given function.
-  void CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD,
+  void createFunctionTypeMetadataForIcall(const FunctionDecl *FD,
   llvm::Function *F);
 
   /// Create and attach type metadata to the given call.
-  void CreateCalleeTypeMetadataForIcall(const QualType &QT, llvm::CallBase 
*CB);
+  void createCa

[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add nounwind to hexagon-strcpy.ll (#151293) (PR #151458)

2025-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/151458

Backport 3796efb

Requested by: @svs-quic

>From 96ebfde394f93570db6817e096d3fa95a38aa2d6 Mon Sep 17 00:00:00 2001
From: Sudharsan Veeravalli 
Date: Thu, 31 Jul 2025 11:13:55 +0530
Subject: [PATCH] [Hexagon] Add nounwind to hexagon-strcpy.ll (#151293)

The test does not check for anything related to cfi information so we
don't really need them in the test checks. Also it looks like there were
some failures on the Alpine Linux builders due to the placement of the
cfi information in the output assembly.

I have also changed `-march` to `-mtriple` in the run line similar to
2208c97

(cherry picked from commit 3796efb5dc08d4596aa986bd03a1290c43e2e995)
---
 llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll 
b/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
index b23366bc11aca..f5430dfea5865 100644
--- a/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
+++ b/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
@@ -1,20 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
-; RUN: llc -march=hexagon -verify-machineinstrs  < %s | FileCheck %s
+; RUN: llc -mtriple=hexagon -verify-machineinstrs  < %s | FileCheck %s
 
 @.str = private unnamed_addr constant [31 x i8] c"DHRYSTONE PROGRAM, 3'RD 
STRING\00", align 1
 @.str1 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
 
-; Function Attrs: nounwind
 declare i32 @printf(i8* nocapture readonly, ...)
 
 ; Function Attrs: nounwind
-define i32 @main() {
+define i32 @main() nounwind {
 ; CHECK-LABEL: main:
-; CHECK: .cfi_startproc
-; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:.cfi_def_cfa r30, 8
-; CHECK-NEXT:.cfi_offset r31, -4
-; CHECK-NEXT:.cfi_offset r30, -8
+; CHECK:   // %bb.0: // %entry
 ; CHECK-NEXT:{
 ; CHECK-NEXT: r0 = ##.L.str1
 ; CHECK-NEXT: r3:2 = CONST64(#2325073635944967245)
@@ -53,5 +48,4 @@ entry:
   ret i32 0
 }
 
-; Function Attrs: nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, 
i32, i32, i1)

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add nounwind to hexagon-strcpy.ll (#151293) (PR #151458)

2025-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/151458
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add nounwind to hexagon-strcpy.ll (#151293) (PR #151458)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:

@androm3da What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/151458
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [Hexagon] Add nounwind to hexagon-strcpy.ll (#151293) (PR #151458)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-hexagon

Author: None (llvmbot)


Changes

Backport 3796efb

Requested by: @svs-quic

---
Full diff: https://github.com/llvm/llvm-project/pull/151458.diff


1 Files Affected:

- (modified) llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll (+3-9) 


``diff
diff --git a/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll 
b/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
index b23366bc11aca..f5430dfea5865 100644
--- a/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
+++ b/llvm/test/CodeGen/Hexagon/hexagon-strcpy.ll
@@ -1,20 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
-; RUN: llc -march=hexagon -verify-machineinstrs  < %s | FileCheck %s
+; RUN: llc -mtriple=hexagon -verify-machineinstrs  < %s | FileCheck %s
 
 @.str = private unnamed_addr constant [31 x i8] c"DHRYSTONE PROGRAM, 3'RD 
STRING\00", align 1
 @.str1 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
 
-; Function Attrs: nounwind
 declare i32 @printf(i8* nocapture readonly, ...)
 
 ; Function Attrs: nounwind
-define i32 @main() {
+define i32 @main() nounwind {
 ; CHECK-LABEL: main:
-; CHECK: .cfi_startproc
-; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:.cfi_def_cfa r30, 8
-; CHECK-NEXT:.cfi_offset r31, -4
-; CHECK-NEXT:.cfi_offset r30, -8
+; CHECK:   // %bb.0: // %entry
 ; CHECK-NEXT:{
 ; CHECK-NEXT: r0 = ##.L.str1
 ; CHECK-NEXT: r3:2 = CONST64(#2325073635944967245)
@@ -53,5 +48,4 @@ entry:
   ret i32 0
 }
 
-; Function Attrs: nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, 
i32, i32, i1)

``




https://github.com/llvm/llvm-project/pull/151458
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Figure out required AGPR count for inline asm (PR #150910)

2025-07-30 Thread Fabian Ritter via llvm-branch-commits


@@ -1200,16 +1200,61 @@ AAAMDWavesPerEU 
&AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
   llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
 }
 
-static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
-  for (const auto &CI : IA->ParseConstraints()) {
+/// Compute the minimum number of AGPRs required to allocate the inline asm.
+static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
+ const CallBase &Call) {
+  unsigned ArgNo = 0;
+  unsigned ResNo = 0;
+  unsigned AGPRDefCount = 0;
+  unsigned AGPRUseCount = 0;
+  unsigned MaxPhysReg = 0;
+  const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
+
+  for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
+Type *Ty = nullptr;
+switch (CI.Type) {
+case InlineAsm::isOutput: {
+  Ty = Call.getType();
+  if (auto *STy = dyn_cast(Ty))
+Ty = STy->getElementType(ResNo);
+  ++ResNo;
+  break;
+}
+case InlineAsm::isInput: {
+  Ty = Call.getArgOperand(ArgNo++)->getType();
+  break;
+}
+case InlineAsm::isLabel:
+  continue;
+case InlineAsm::isClobber:
+  // Parse the physical register reference.
+  break;
+}
+
 for (StringRef Code : CI.Codes) {
-  Code.consume_front("{");
-  if (Code.starts_with("a"))
-return true;
+  if (Code.starts_with("a")) {
+// Virtual register, compute number of registers based on the type.
+//
+// We ought to be going through TargetLowering to get the number of
+// registers, but we should avoid the dependence on CodeGen here.
+unsigned RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
+if (CI.Type == InlineAsm::isOutput) {
+  AGPRDefCount += RegCount;
+  if (CI.isEarlyClobber)
+AGPRUseCount += RegCount;
+} else
+  AGPRUseCount += RegCount;
+  } else {
+// Physical register reference
+auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
+if (Kind == 'a')
+  MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
+  }
 }
   }
 
-  return false;
+  unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
+  return std::min(MaxVirtReg + MaxPhysReg, 256u);

ritter-x2a wrote:

For this code
```
define amdgpu_kernel void @foo() {
  call void asm sideeffect "; use $0, $1, $2", "{a16},a,a"(i32 17, <8 x i32> 
splat (i32 1), <16 x i32> splat (i32 2))
  ret void
}
```
we allocate `; use a16, a[18:25], a[0:15]`, so the asm uses 25 AGPRs (arguably 
26 since `a25` is used and `a17` is left out, not sure why it's not allocated 
as `a[17:24]`, I'm not aware of alignment requirements for AGPRs).
This function computes 17 (the highest required physical register index + 1) + 
24 (the number of virtual registers required) = 41 AGPRs required.
This over-approximation seems worth pointing out in a comment, if it's intended.

https://github.com/llvm/llvm-project/pull/150910
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)

2025-07-30 Thread via llvm-branch-commits

https://github.com/easyonaadit updated 
https://github.com/llvm/llvm-project/pull/150170

>From 2d22d224d27438d8d9d0979a5fd937653a1cb8af Mon Sep 17 00:00:00 2001
From: Aaditya 
Date: Sat, 19 Jul 2025 12:57:27 +0530
Subject: [PATCH] Add builtins for wave reduction intrinsics

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def |  25 ++
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp  |  58 +++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl  | 378 +++
 3 files changed, 461 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 878543566f0e3..b91b32457ff86 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -351,6 +351,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
 BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
 BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
 
+//===--===//
+
+// Wave Reduction builtins.
+
+//===--===//
+
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc")
+
 
//===--===//
 // R600-NI only builtins.
 
//===--===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp 
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 7dccf82b1a7a3..c3d9ec5fc8309 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -295,11 +295,69 @@ void 
CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
   Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
 }
 
+static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
+  switch (BuiltinID) {
+  default:
+llvm_unreachable("Unknown BuiltinID for wave reduction");
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
+return Intrinsic::amdgcn_wave_reduce_add;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
+return Intrinsic::amdgcn_wave_reduce_sub;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
+return Intrinsic::amdgcn_wave_reduce_min;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
+return Intrinsic::amdgcn_wave_reduce_umin;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
+return Intrinsic::amdgcn_wave_reduce_max;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
+return Intrinsic::amdgcn_wave_reduce_umax;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
+return Intrinsic::amdgcn_wave_reduce_and;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
+return Intrinsic::amdgcn_wave_reduce_or;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
+return Intrinsic::amdgcn_wave_reduce_xor;
+  }
+}
+
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   const CallExpr *E) {
   llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
   llvm::SyncScope::ID SSID;
   switch (BuiltinID) {
+  case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+  case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u

[llvm-branch-commits] [clang] [AMDGPU] Add builtins for wave reduction intrinsics (PR #150170)

2025-07-30 Thread via llvm-branch-commits

https://github.com/easyonaadit updated 
https://github.com/llvm/llvm-project/pull/150170

>From 2d22d224d27438d8d9d0979a5fd937653a1cb8af Mon Sep 17 00:00:00 2001
From: Aaditya 
Date: Sat, 19 Jul 2025 12:57:27 +0530
Subject: [PATCH] Add builtins for wave reduction intrinsics

---
 clang/include/clang/Basic/BuiltinsAMDGPU.def |  25 ++
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp  |  58 +++
 clang/test/CodeGenOpenCL/builtins-amdgcn.cl  | 378 +++
 3 files changed, 461 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 878543566f0e3..b91b32457ff86 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -351,6 +351,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
 BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n")
 BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n")
 
+//===--===//
+
+// Wave Reduction builtins.
+
+//===--===//
+
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_add_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_sub_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc")
+BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc")
+
 
//===--===//
 // R600-NI only builtins.
 
//===--===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp 
b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 7dccf82b1a7a3..c3d9ec5fc8309 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -295,11 +295,69 @@ void 
CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
   Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));
 }
 
+static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
+  switch (BuiltinID) {
+  default:
+llvm_unreachable("Unknown BuiltinID for wave reduction");
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
+return Intrinsic::amdgcn_wave_reduce_add;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
+return Intrinsic::amdgcn_wave_reduce_sub;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
+return Intrinsic::amdgcn_wave_reduce_min;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
+return Intrinsic::amdgcn_wave_reduce_umin;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
+return Intrinsic::amdgcn_wave_reduce_max;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
+return Intrinsic::amdgcn_wave_reduce_umax;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
+return Intrinsic::amdgcn_wave_reduce_and;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
+return Intrinsic::amdgcn_wave_reduce_or;
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
+  case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
+return Intrinsic::amdgcn_wave_reduce_xor;
+  }
+}
+
 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   const CallExpr *E) {
   llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
   llvm::SyncScope::ID SSID;
   switch (BuiltinID) {
+  case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
+  case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u

[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 2 (PR #151309)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Aaditya (easyonaadit)


Changes

Supporting Arithemtic Operations: `add`, `sub`

---

Patch is 168.45 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/151309.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+130-25) 
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+2) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll (+1356) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll (+1663-48) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0f529ef362199..56d8e739b6493 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5107,7 +5107,9 @@ static uint32_t getIdentityValueForWaveReduction(unsigned 
Opc) {
   case AMDGPU::V_CMP_GT_I64_e64: // max.i64
 return std::numeric_limits::min();
   case AMDGPU::S_ADD_I32:
+  case AMDGPU::S_ADD_U64_PSEUDO:
   case AMDGPU::S_SUB_I32:
+  case AMDGPU::S_SUB_U64_PSEUDO:
   case AMDGPU::S_OR_B32:
   case AMDGPU::S_XOR_B32:
 return std::numeric_limits::min();
@@ -5153,11 +5155,14 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr 
&MI,
 }
 case AMDGPU::S_XOR_B32:
 case AMDGPU::S_ADD_I32:
-case AMDGPU::S_SUB_I32: {
+case AMDGPU::S_ADD_U64_PSEUDO:
+case AMDGPU::S_SUB_I32:
+case AMDGPU::S_SUB_U64_PSEUDO: {
   const TargetRegisterClass *WaveMaskRegClass = TRI->getWaveMaskRegClass();
   const TargetRegisterClass *DstRegClass = MRI.getRegClass(DstReg);
   Register ExecMask = MRI.createVirtualRegister(WaveMaskRegClass);
-  Register ActiveLanes = MRI.createVirtualRegister(DstRegClass);
+  Register ActiveLanes =
+  MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
   bool IsWave32 = ST.isWave32();
   unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
@@ -5165,39 +5170,39 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr 
&MI,
   unsigned CountReg =
   IsWave32 ? AMDGPU::S_BCNT1_I32_B32 : AMDGPU::S_BCNT1_I32_B64;
 
-  auto Exec =
   BuildMI(BB, MI, DL, TII->get(MovOpc), ExecMask).addReg(ExecReg);
 
-  auto NewAccumulator = BuildMI(BB, MI, DL, TII->get(CountReg), 
ActiveLanes)
-.addReg(Exec->getOperand(0).getReg());
+  auto NewAccumulator =
+  BuildMI(BB, MI, DL, TII->get(CountReg), ActiveLanes)
+  .addReg(ExecMask);
+
+  switch (Opc) {
+  case AMDGPU::S_XOR_B32: {
+// Performing an XOR operation on a uniform value
+// depends on the parity of the number of active lanes.
+// For even parity, the result will be 0, for odd
+// parity the result will be the same as the input value.
+Register ParityRegister =
+MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
 
-  switch (Opc) {
-  case AMDGPU::S_XOR_B32: {
-// Performing an XOR operation on a uniform value
-// depends on the parity of the number of active lanes.
-// For even parity, the result will be 0, for odd
-// parity the result will be the same as the input value.
-Register ParityRegister = MRI.createVirtualRegister(DstRegClass);
-
-auto ParityReg =
 BuildMI(BB, MI, DL, TII->get(AMDGPU::S_AND_B32), ParityRegister)
 .addReg(NewAccumulator->getOperand(0).getReg())
-.addImm(1);
-BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
-.addReg(SrcReg)
-.addReg(ParityReg->getOperand(0).getReg());
-break;
-  }
+.addImm(1)
+.setOperandDead(3); // Dead scc
+BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
+.addReg(SrcReg)
+.addReg(ParityRegister);
+break;
+  }
   case AMDGPU::S_SUB_I32: {
 Register NegatedVal = MRI.createVirtualRegister(DstRegClass);
 
 // Take the negation of the source operand.
-auto InvertedValReg =
-BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), NegatedVal)
-.addImm(-1)
-.addReg(SrcReg);
+BuildMI(BB, MI, DL, TII->get(AMDGPU::S_SUB_I32), NegatedVal)
+.addImm(0)
+.addReg(SrcReg);
 BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
-.addReg(InvertedValReg->getOperand(0).getReg())
+.addReg(NegatedVal)
 .addReg(NewAccumulator->getOperand(0).getReg());
 break;
   }
@@ -5207,6 +5212,74 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr 
&MI,
 .addReg(NewAccumulator->getOperand(0).getReg());
 break;
   }
+  case AMDGPU::S_ADD_U64_PSEUDO:
+  case AMDGPU::S_SUB_U64_PSEUDO: {
+Register Des

[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 3 (PR #151310)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Aaditya (easyonaadit)


Changes

Supporting Arithemtic Operations: `and`, `or`, `xor`

---

Patch is 146.75 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/151310.diff


5 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+72-6) 
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+3) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll (+854) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll (+855) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll (+1413) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 56d8e739b6493..c8a0372aa0f8a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5111,9 +5111,12 @@ static uint32_t 
getIdentityValueForWaveReduction(unsigned Opc) {
   case AMDGPU::S_SUB_I32:
   case AMDGPU::S_SUB_U64_PSEUDO:
   case AMDGPU::S_OR_B32:
+  case AMDGPU::S_OR_B64:
   case AMDGPU::S_XOR_B32:
+  case AMDGPU::S_XOR_B64:
 return std::numeric_limits::min();
   case AMDGPU::S_AND_B32:
+  case AMDGPU::S_AND_B64:
 return std::numeric_limits::max();
   default:
 llvm_unreachable("Unexpected opcode in getIdentityValueForWaveReduction");
@@ -5146,7 +5149,9 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr 
&MI,
 case AMDGPU::S_MAX_I32:
 case AMDGPU::V_CMP_GT_I64_e64: /*max*/
 case AMDGPU::S_AND_B32:
-case AMDGPU::S_OR_B32: {
+case AMDGPU::S_AND_B64:
+case AMDGPU::S_OR_B32:
+case AMDGPU::S_OR_B64: {
   // Idempotent operations.
   unsigned movOpc = is32BitOpc ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
   BuildMI(BB, MI, DL, TII->get(movOpc), DstReg).addReg(SrcReg);
@@ -5154,6 +5159,7 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr 
&MI,
   break;
 }
 case AMDGPU::S_XOR_B32:
+case AMDGPU::S_XOR_B64:
 case AMDGPU::S_ADD_I32:
 case AMDGPU::S_ADD_U64_PSEUDO:
 case AMDGPU::S_SUB_I32:
@@ -5177,7 +5183,8 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr 
&MI,
   .addReg(ExecMask);
 
   switch (Opc) {
-  case AMDGPU::S_XOR_B32: {
+  case AMDGPU::S_XOR_B32:
+  case AMDGPU::S_XOR_B64: {
 // Performing an XOR operation on a uniform value
 // depends on the parity of the number of active lanes.
 // For even parity, the result will be 0, for odd
@@ -5189,10 +5196,54 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr 
&MI,
 .addReg(NewAccumulator->getOperand(0).getReg())
 .addImm(1)
 .setOperandDead(3); // Dead scc
-BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
-.addReg(SrcReg)
-.addReg(ParityRegister);
-break;
+if (is32BitOpc) {
+  BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DstReg)
+  .addReg(SrcReg)
+  .addReg(ParityRegister);
+  break;
+} else {
+  Register DestSub0 =
+  MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  Register DestSub1 =
+  MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  Register Op1H_Op0L_Reg =
+  MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  Register CarryReg =
+  MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+  const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+  const TargetRegisterClass *SrcSubRC =
+  TRI->getSubRegisterClass(SrcRC, AMDGPU::sub0);
+
+  MachineOperand Op1L = TII->buildExtractSubRegOrImm(
+  MI, MRI, MI.getOperand(1), SrcRC, AMDGPU::sub0, SrcSubRC);
+  MachineOperand Op1H = TII->buildExtractSubRegOrImm(
+  MI, MRI, MI.getOperand(1), SrcRC, AMDGPU::sub1, SrcSubRC);
+
+  BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), DestSub0)
+  .add(Op1L)
+  .addReg(ParityRegister);
+
+  BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_I32), Op1H_Op0L_Reg)
+  .add(Op1H)
+  .addReg(ParityRegister);
+
+  BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MUL_HI_U32), CarryReg)
+  .add(Op1L)
+  .addReg(ParityRegister);
+
+  BuildMI(BB, MI, DL, TII->get(AMDGPU::S_ADD_U32), DestSub1)
+  .addReg(CarryReg)
+  .addReg(Op1H_Op0L_Reg)
+  .setOperandDead(3); // Dead scc
+
+  BuildMI(BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), DstReg)
+  .addReg(DestSub0)
+  .addImm(AMDGPU::sub0)
+  .addReg(DestSub1)
+  .addImm(AMDGPU

[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 3 (PR #151310)

2025-07-30 Thread via llvm-branch-commits

https://github.com/easyonaadit ready_for_review 
https://github.com/llvm/llvm-project/pull/151310
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Extending wave reduction intrinsics for `i64` types - 2 (PR #151309)

2025-07-30 Thread via llvm-branch-commits

https://github.com/easyonaadit ready_for_review 
https://github.com/llvm/llvm-project/pull/151309
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Enable scalable vectorization of linalg.unpack (PR #149293)

2025-07-30 Thread Andrzej Warzyński via llvm-branch-commits

banach-space wrote:

**UPDATE: 30/7/25**

* This 
[commit](https://github.com/llvm/llvm-project/pull/149293/commits/56108b1df69e150c475adc58880ca7dce5355b21)
 addresses the remaining comments from @hanhanW . 
* I have rebased this PR on top of 
https://github.com/llvm/llvm-project/pull/151334. This rebase addresses this 
[comment](https://github.com/llvm/llvm-project/pull/149293#discussion_r2237499014)
 from @egebeysel .

**GENERAL OBSERVATIONS + FUTURE STEPS**

Having implemented #151334, I now realise that we don't require separate vector 
sizes for the _write_ operation (there's a small twist though).

To illustrate, take this example:
```mlir
func.func @example(%source: tensor<8x4x16x16xf32>, %dest: tensor<64x127xf32>) 
-> tensor<64x127xf32> {
   %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] 
inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32>
   return %0 : tensor<64x127xf32>
 }
```

It will be vectorized as:
```mlir
  func.func @example(%arg0: tensor<8x4x16x16xf32>, %arg1: tensor<64x127xf32>) 
-> tensor<64x127xf32> {
%cst = arith.constant 0.00e+00 : f32
%c0 = arith.constant 0 : index
// This is key - vec Op 1 !!!
%0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %cst {in_bounds = 
[true, true, true, true]} : tensor<8x4x16x16xf32>, vector<8x4x16x16xf32>
// This is key - vec Op 2 !!!
%1 = vector.transpose %0, [1, 2, 0, 3] : vector<8x4x16x16xf32> to 
vector<4x16x8x16xf32>
// This is key - vec Op 3 !!!
%2 = vector.shape_cast %1 : vector<4x16x8x16xf32> to vector<64x128xf32>
%c0_0 = arith.constant 0 : index
// This is key - vec Op 4!!!
%3 = vector.transfer_write %2, %arg1[%c0_0, %c0_0] {in_bounds = [true, 
false]} : vector<64x128xf32>, tensor<64x127xf32>
return %3 : tensor<64x127xf32>
  }
```

Now, once we vectorize the read operation, the remaining sizes are already 
pre-determined (i.e. the sizes for the _write_ operation):
* For `vector.transpose`, the sizes must match the sizes from 
`vector.transfer_read` (% permutation).
* For `vector.shape_cast`, the input must match the output of 
`vector.transpose`. The output is uniquely determined by e.g. applying 
`outer_dims_perm` from `linalg.unpack` to the output from `vector.transpose`.
* For `vector.transfer_write`, we have to use the output shape from 
`vector.shape_cast`.

TL;Dr We should only require sizes for the _write_ operation.

**TWIST**

While we should be able to infer the scalable flags, there is some logic still 
missing. This should not be a problem though.

**NEXT STEPS**

While we could land this as is (IREE integration looks fine: 
https://github.com/iree-org/iree/pull/21514, thanks @hanhanW ) and then iterate 
in-tree, it might be "healthier" if there's one self-contained change. 

Let me refine this and then integrate into IREE (to make sure that the 
integration works). Also, @hanhanW , lets sync offline and make sure that 
switching to "only vector sizes for the read Op" is going to work for IREE.

WDYT?

https://github.com/llvm/llvm-project/pull/149293
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Enable scalable vectorization of linalg.unpack (PR #149293)

2025-07-30 Thread Andrzej Warzyński via llvm-branch-commits

https://github.com/banach-space edited 
https://github.com/llvm/llvm-project/pull/149293
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy] [COFF] Ignore associative sections in executables (#151143) (PR #151336)

2025-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/151336

Backport fcbbcffd2e6ea30097809ba0cd1e3b6003fa862f

Requested by: @mstorsjo

>From b2b1c3d83951ebe6665314f1d10bb38077d01912 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= 
Date: Wed, 30 Jul 2025 15:39:04 +0200
Subject: [PATCH] [llvm-objcopy] [COFF] Ignore associative sections in
 executables (#151143)

COFF associative sections is a feature where relocatable object files
can have section snippets marked as related to another section snippet,
so they are kept or discarded in relation to that other section snippet.

When llvm-objcopy removes sections, it also removes sections that are
marked as associative to the removed section (as the associative
sections otherwise would end up orphaned).

In a linked executable module (EXE or DLL), section associativity is
meaningless - thus, we should ignore those fields from the input.

After linking, GNU ld keeps the SectionDefinition auxillary part of
symbols intact as it was in the source object file, which means that it
references section numbers in the source object files.

This fixes https://github.com/llvm/llvm-project/issues/53433.

(cherry picked from commit fcbbcffd2e6ea30097809ba0cd1e3b6003fa862f)
---
 llvm/lib/ObjCopy/COFF/COFFReader.cpp  |   2 +-
 .../llvm-objcopy/COFF/exe-bogus-assoc.test| 134 ++
 2 files changed, 135 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test

diff --git a/llvm/lib/ObjCopy/COFF/COFFReader.cpp 
b/llvm/lib/ObjCopy/COFF/COFFReader.cpp
index 62a71d41ded5f..9b55f76e58404 100644
--- a/llvm/lib/ObjCopy/COFF/COFFReader.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFReader.cpp
@@ -135,7 +135,7 @@ Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) 
const {
 // it is, find the target section unique id.
 const coff_aux_section_definition *SD = SymRef.getSectionDefinition();
 const coff_aux_weak_external *WE = SymRef.getWeakExternal();
-if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE && !Obj.IsPE) {
   int32_t Index = SD->getNumber(IsBigObj);
   if (Index <= 0 || static_cast(Index - 1) >= Sections.size())
 return createStringError(object_error::parse_failed,
diff --git a/llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test 
b/llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test
new file mode 100644
index 0..12f14b5d58e1c
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test
@@ -0,0 +1,134 @@
+## Test that bogus associative section symbols in executables are ignored.
+##
+## The executable contains two (bogus) associative section symbols, both for
+## (parts of) the .rdata section; one pointing at the .debug_info section
+## (which will be stripped out) and one pointing at a nonexistent section.
+##
+## Check that stripping does succeed, and that it doesn't end up removing
+## the .rdata section.
+
+# RUN: yaml2obj %s -o %t.in.exe
+
+# RUN: llvm-strip --strip-debug %t.in.exe -o %t.out.exe
+# RUN: llvm-readobj --sections %t.out.exe | FileCheck %s
+
+# CHECK: Name: .rdata
+
+--- !COFF
+OptionalHeader:
+  AddressOfEntryPoint: 4096
+  ImageBase:   5368709120
+  SectionAlignment: 4096
+  FileAlignment:   512
+  MajorOperatingSystemVersion: 4
+  MinorOperatingSystemVersion: 0
+  MajorImageVersion: 0
+  MinorImageVersion: 0
+  MajorSubsystemVersion: 5
+  MinorSubsystemVersion: 2
+  Subsystem:   IMAGE_SUBSYSTEM_WINDOWS_CUI
+  DLLCharacteristics: [  ]
+  SizeOfStackReserve: 2097152
+  SizeOfStackCommit: 4096
+  SizeOfHeapReserve: 1048576
+  SizeOfHeapCommit: 4096
+header:
+  Machine: IMAGE_FILE_MACHINE_AMD64
+  Characteristics: [  ]
+sections:
+  - Name:.text
+Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, 
IMAGE_SCN_MEM_READ ]
+VirtualAddress:  4096
+VirtualSize: 48
+SectionData: 
E80600E80200C3C3C30F1F00
+SizeOfRawData:   512
+  - Name:.rdata
+Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+VirtualAddress:  8192
+VirtualSize: 4
+SectionData: ''
+SizeOfRawData:   512
+  - Name:.debug_info
+Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, 
IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ]
+VirtualAddress:  16384
+VirtualSize: 4
+SectionData: ''
+SizeOfRawData:   512
+symbols:
+  - Name:.text
+Value:   0
+SectionNumber:   1
+SimpleType:  IMAGE_SYM_TYPE_NULL
+ComplexType: IMAGE_SYM_DTYPE_NULL
+StorageClass:IMAGE_SYM_CLASS_STATIC
+SectionDefinition:
+  Length:  11
+  NumberOfRelocations: 2
+  NumberOfLinenumbers: 0
+  CheckSum:1703692295
+  Number:  1
+  - Name:'.te

[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy] [COFF] Ignore associative sections in executables (#151143) (PR #151336)

2025-07-30 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/151336
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy] [COFF] Ignore associative sections in executables (#151143) (PR #151336)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:

@cjacek What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/151336
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy] [COFF] Ignore associative sections in executables (#151143) (PR #151336)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-binary-utilities

Author: None (llvmbot)


Changes

Backport fcbbcffd2e6ea30097809ba0cd1e3b6003fa862f

Requested by: @mstorsjo

---
Full diff: https://github.com/llvm/llvm-project/pull/151336.diff


2 Files Affected:

- (modified) llvm/lib/ObjCopy/COFF/COFFReader.cpp (+1-1) 
- (added) llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test (+134) 


``diff
diff --git a/llvm/lib/ObjCopy/COFF/COFFReader.cpp 
b/llvm/lib/ObjCopy/COFF/COFFReader.cpp
index 62a71d41ded5f..9b55f76e58404 100644
--- a/llvm/lib/ObjCopy/COFF/COFFReader.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFReader.cpp
@@ -135,7 +135,7 @@ Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) 
const {
 // it is, find the target section unique id.
 const coff_aux_section_definition *SD = SymRef.getSectionDefinition();
 const coff_aux_weak_external *WE = SymRef.getWeakExternal();
-if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE && !Obj.IsPE) {
   int32_t Index = SD->getNumber(IsBigObj);
   if (Index <= 0 || static_cast(Index - 1) >= Sections.size())
 return createStringError(object_error::parse_failed,
diff --git a/llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test 
b/llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test
new file mode 100644
index 0..12f14b5d58e1c
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/COFF/exe-bogus-assoc.test
@@ -0,0 +1,134 @@
+## Test that bogus associative section symbols in executables are ignored.
+##
+## The executable contains two (bogus) associative section symbols, both for
+## (parts of) the .rdata section; one pointing at the .debug_info section
+## (which will be stripped out) and one pointing at a nonexistent section.
+##
+## Check that stripping does succeed, and that it doesn't end up removing
+## the .rdata section.
+
+# RUN: yaml2obj %s -o %t.in.exe
+
+# RUN: llvm-strip --strip-debug %t.in.exe -o %t.out.exe
+# RUN: llvm-readobj --sections %t.out.exe | FileCheck %s
+
+# CHECK: Name: .rdata
+
+--- !COFF
+OptionalHeader:
+  AddressOfEntryPoint: 4096
+  ImageBase:   5368709120
+  SectionAlignment: 4096
+  FileAlignment:   512
+  MajorOperatingSystemVersion: 4
+  MinorOperatingSystemVersion: 0
+  MajorImageVersion: 0
+  MinorImageVersion: 0
+  MajorSubsystemVersion: 5
+  MinorSubsystemVersion: 2
+  Subsystem:   IMAGE_SUBSYSTEM_WINDOWS_CUI
+  DLLCharacteristics: [  ]
+  SizeOfStackReserve: 2097152
+  SizeOfStackCommit: 4096
+  SizeOfHeapReserve: 1048576
+  SizeOfHeapCommit: 4096
+header:
+  Machine: IMAGE_FILE_MACHINE_AMD64
+  Characteristics: [  ]
+sections:
+  - Name:.text
+Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, 
IMAGE_SCN_MEM_READ ]
+VirtualAddress:  4096
+VirtualSize: 48
+SectionData: 
E80600E80200C3C3C30F1F00
+SizeOfRawData:   512
+  - Name:.rdata
+Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+VirtualAddress:  8192
+VirtualSize: 4
+SectionData: ''
+SizeOfRawData:   512
+  - Name:.debug_info
+Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, 
IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ]
+VirtualAddress:  16384
+VirtualSize: 4
+SectionData: ''
+SizeOfRawData:   512
+symbols:
+  - Name:.text
+Value:   0
+SectionNumber:   1
+SimpleType:  IMAGE_SYM_TYPE_NULL
+ComplexType: IMAGE_SYM_DTYPE_NULL
+StorageClass:IMAGE_SYM_CLASS_STATIC
+SectionDefinition:
+  Length:  11
+  NumberOfRelocations: 2
+  NumberOfLinenumbers: 0
+  CheckSum:1703692295
+  Number:  1
+  - Name:'.text$func1'
+Value:   11
+SectionNumber:   1
+SimpleType:  IMAGE_SYM_TYPE_NULL
+ComplexType: IMAGE_SYM_DTYPE_NULL
+StorageClass:IMAGE_SYM_CLASS_STATIC
+SectionDefinition:
+  Length:  1
+  NumberOfRelocations: 0
+  NumberOfLinenumbers: 0
+  CheckSum:40735498
+  Number:  3
+  Selection:   IMAGE_COMDAT_SELECT_ANY
+  - Name:.rdata
+Value:   0
+SectionNumber:   2
+SimpleType:  IMAGE_SYM_TYPE_NULL
+ComplexType: IMAGE_SYM_DTYPE_NULL
+StorageClass:IMAGE_SYM_CLASS_STATIC
+SectionDefinition:
+  Length:  1
+  NumberOfRelocations: 0
+  NumberOfLinenumbers: 0
+  CheckSum:0
+  Number:  3
+  Selection:   IMAGE_COMDAT_SELECT_ASSOCIATIVE
+  - Name:'.text$func2'
+Value:   12
+SectionNumber:   1
+SimpleType:  IMAGE_SYM_TYPE_NULL
+ComplexType: IMAGE_SYM_DTYPE_NULL
+StorageClass:IMAGE_SYM_CLASS_STATIC
+SectionDefinition:
+  Length:  1
+  NumberOfRel

[llvm-branch-commits] [lldb] release/21.x: [lldb][AArch64][Linux] Show MTE store only setting in mte_ctrl (#145033) (PR #151111)

2025-07-30 Thread Omair Javaid via llvm-branch-commits

omjavaid wrote:

> @omjavaid @omjavaid What do you think about merging this PR to the release 
> branch?

This looks good to make the LLVM 21. +1 from my side.

https://github.com/llvm/llvm-project/pull/15
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libc] [llvm] [libc][math] Refactor atan2f128 implementation to header-only in src/__support/math folder. (PR #151012)

2025-07-30 Thread Muhammad Bassiouni via llvm-branch-commits

https://github.com/bassiounix updated 
https://github.com/llvm/llvm-project/pull/151012

>From a4bd4ed9b3ce4b833cad7421816ff03fb7df9fab Mon Sep 17 00:00:00 2001
From: bassiounix 
Date: Mon, 28 Jul 2025 21:14:48 +0300
Subject: [PATCH 1/2] [libc][math] Refactor atan2f128 implementation to
 header-only in src/__support/math folder.

---
 libc/shared/math.h|   1 +
 libc/shared/math/atan2f128.h  |  29 +++
 libc/src/__support/math/CMakeLists.txt|  15 ++
 libc/src/__support/math/atan2f128.h   | 212 ++
 libc/src/math/generic/CMakeLists.txt  |  10 +-
 libc/src/math/generic/atan2f128.cpp   | 190 +---
 libc/test/shared/shared_math_test.cpp |   2 +
 .../llvm-project-overlay/libc/BUILD.bazel |  24 +-
 8 files changed, 284 insertions(+), 199 deletions(-)
 create mode 100644 libc/shared/math/atan2f128.h
 create mode 100644 libc/src/__support/math/atan2f128.h

diff --git a/libc/shared/math.h b/libc/shared/math.h
index 527bb8d6214ae..6cb583c08dedd 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -25,6 +25,7 @@
 #include "math/atan.h"
 #include "math/atan2.h"
 #include "math/atan2f.h"
+#include "math/atan2f128.h"
 #include "math/atanf.h"
 #include "math/atanf16.h"
 #include "math/erff.h"
diff --git a/libc/shared/math/atan2f128.h b/libc/shared/math/atan2f128.h
new file mode 100644
index 0..d7aee40c69527
--- /dev/null
+++ b/libc/shared/math/atan2f128.h
@@ -0,0 +1,29 @@
+//===-- Shared atan2f128 function ---*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATAN2F128_H
+#define LLVM_LIBC_SHARED_MATH_ATAN2F128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atan2f128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atan2f128;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT128
+
+#endif // LLVM_LIBC_SHARED_MATH_ATAN2F128_H
diff --git a/libc/src/__support/math/CMakeLists.txt 
b/libc/src/__support/math/CMakeLists.txt
index c197b19ed29de..caafdc2cbf1d6 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -230,6 +230,21 @@ add_header_library(
 libc.src.__support.macros.optimization
 )
 
+add_header_library(
+  atan2f128
+  HDRS
+atan2f128.h
+  DEPENDS
+.atan_utils
+libc.src.__support.integer_literals
+libc.src.__support.uint128
+libc.src.__support.FPUtil.dyadic_float
+libc.src.__support.FPUtil.fp_bits
+libc.src.__support.FPUtil.multiply_add
+libc.src.__support.FPUtil.nearest_integer
+libc.src.__support.macros.optimization
+)
+
 add_header_library(
   atanf
   HDRS
diff --git a/libc/src/__support/math/atan2f128.h 
b/libc/src/__support/math/atan2f128.h
new file mode 100644
index 0..89efaf1fd72a0
--- /dev/null
+++ b/libc/src/__support/math/atan2f128.h
@@ -0,0 +1,212 @@
+//===-- Implementation header for atan2f128 -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "atan_utils.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/integer_literals.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/uint128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+// There are several range reduction steps we can take for atan2(y, x) as
+// follow:
+
+// * Range reduction 1: signness
+// atan2(y, x) will return a number between -PI and PI representing the angle
+// forming by the 0x axis and the vector (x, y) on the 0xy-plane.
+// In particular, we have that:
+//   atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant)
+//   = pi + atan( y/x )if x < 0 and y >= 0  (II-quadrant)
+//   = -pi + atan( y/x )   if x < 0 and y < 0   (III-quadrant)
+//   = atan( y/x ) if x >= 0 and y < 0  (IV-quadrant)
+// Since atan function is odd, we can use the formula:
+//   atan(-u) = -atan(u)
+// to adjust the a

[llvm-branch-commits] [libc] [llvm] [libc][math] Refactor atan2f implementation to header-only in src/__support/math folder. (PR #150993)

2025-07-30 Thread Muhammad Bassiouni via llvm-branch-commits

https://github.com/bassiounix updated 
https://github.com/llvm/llvm-project/pull/150993

>From 37d0403d9fbb96d117cc8ce90cdee667ee9f86b2 Mon Sep 17 00:00:00 2001
From: bassiounix 
Date: Mon, 28 Jul 2025 19:35:03 +0300
Subject: [PATCH] [libc][math] Refactor atan2f implementation to header-only in
 src/__support/math folder.

---
 libc/shared/math.h|   1 +
 libc/shared/math/atan2f.h |  23 ++
 libc/src/__support/math/CMakeLists.txt|  17 +
 libc/src/__support/math/atan2f.h  | 351 ++
 .../generic => __support/math}/atan2f_float.h |  21 +-
 libc/src/math/generic/CMakeLists.txt  |  12 +-
 libc/src/math/generic/atan2f.cpp  | 328 +---
 libc/test/shared/CMakeLists.txt   |   1 +
 libc/test/shared/shared_math_test.cpp |   1 +
 .../llvm-project-overlay/libc/BUILD.bazel |  20 +-
 10 files changed, 427 insertions(+), 348 deletions(-)
 create mode 100644 libc/shared/math/atan2f.h
 create mode 100644 libc/src/__support/math/atan2f.h
 rename libc/src/{math/generic => __support/math}/atan2f_float.h (95%)

diff --git a/libc/shared/math.h b/libc/shared/math.h
index 0605d918eb2af..527bb8d6214ae 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -24,6 +24,7 @@
 #include "math/asinhf16.h"
 #include "math/atan.h"
 #include "math/atan2.h"
+#include "math/atan2f.h"
 #include "math/atanf.h"
 #include "math/atanf16.h"
 #include "math/erff.h"
diff --git a/libc/shared/math/atan2f.h b/libc/shared/math/atan2f.h
new file mode 100644
index 0..2de09d25e19f8
--- /dev/null
+++ b/libc/shared/math/atan2f.h
@@ -0,0 +1,23 @@
+//===-- Shared atan2f function --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATAN2F_H
+#define LLVM_LIBC_SHARED_MATH_ATAN2F_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atan2f.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atan2f;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ATAN2F_H
diff --git a/libc/src/__support/math/CMakeLists.txt 
b/libc/src/__support/math/CMakeLists.txt
index bbb07b62552f6..c197b19ed29de 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -213,6 +213,23 @@ add_header_library(
 libc.src.__support.macros.optimization
 )
 
+add_header_library(
+  atan2f
+  HDRS
+atan2f_float.h
+atan2f.h
+  DEPENDS
+.inv_trigf_utils
+libc.src.__support.FPUtil.double_double
+libc.src.__support.FPUtil.fenv_impl
+libc.src.__support.FPUtil.fp_bits
+libc.src.__support.FPUtil.multiply_add
+libc.src.__support.FPUtil.nearest_integer
+libc.src.__support.FPUtil.polyeval
+libc.src.__support.macros.config
+libc.src.__support.macros.optimization
+)
+
 add_header_library(
   atanf
   HDRS
diff --git a/libc/src/__support/math/atan2f.h b/libc/src/__support/math/atan2f.h
new file mode 100644
index 0..e3b19329126f4
--- /dev/null
+++ b/libc/src/__support/math/atan2f.h
@@ -0,0 +1,351 @@
+//===-- Implementation header for atan2f *- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H
+
+#include "inv_trigf_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) &&   
\
+defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT)
+
+// We use float-float implementation to reduce size.
+#include "atan2f_float.h"
+
+#else
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace atan2f_internal {
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+// Look up tables for accurate pass:
+
+// atan(i/16) with i = 0..16, generated by Sollya with:
+// > for i from 0 to 16 do {
+// a = round(atan(i/16), D, RN);
+// b = round(atan(i/16) - a, D, RN);
+// print("{", b, ",", a, "},");
+//   };
+static constexpr fputil::DoubleDouble ATAN_I[17] = {
+{0.0, 0.0},
+{-0x1.c934d86d23

[llvm-branch-commits] [libc] [llvm] [libc][math] Refactor atan2f128 implementation to header-only in src/__support/math folder. (PR #151012)

2025-07-30 Thread Muhammad Bassiouni via llvm-branch-commits

https://github.com/bassiounix updated 
https://github.com/llvm/llvm-project/pull/151012

>From a4bd4ed9b3ce4b833cad7421816ff03fb7df9fab Mon Sep 17 00:00:00 2001
From: bassiounix 
Date: Mon, 28 Jul 2025 21:14:48 +0300
Subject: [PATCH 1/2] [libc][math] Refactor atan2f128 implementation to
 header-only in src/__support/math folder.

---
 libc/shared/math.h|   1 +
 libc/shared/math/atan2f128.h  |  29 +++
 libc/src/__support/math/CMakeLists.txt|  15 ++
 libc/src/__support/math/atan2f128.h   | 212 ++
 libc/src/math/generic/CMakeLists.txt  |  10 +-
 libc/src/math/generic/atan2f128.cpp   | 190 +---
 libc/test/shared/shared_math_test.cpp |   2 +
 .../llvm-project-overlay/libc/BUILD.bazel |  24 +-
 8 files changed, 284 insertions(+), 199 deletions(-)
 create mode 100644 libc/shared/math/atan2f128.h
 create mode 100644 libc/src/__support/math/atan2f128.h

diff --git a/libc/shared/math.h b/libc/shared/math.h
index 527bb8d6214ae..6cb583c08dedd 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -25,6 +25,7 @@
 #include "math/atan.h"
 #include "math/atan2.h"
 #include "math/atan2f.h"
+#include "math/atan2f128.h"
 #include "math/atanf.h"
 #include "math/atanf16.h"
 #include "math/erff.h"
diff --git a/libc/shared/math/atan2f128.h b/libc/shared/math/atan2f128.h
new file mode 100644
index 0..d7aee40c69527
--- /dev/null
+++ b/libc/shared/math/atan2f128.h
@@ -0,0 +1,29 @@
+//===-- Shared atan2f128 function ---*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATAN2F128_H
+#define LLVM_LIBC_SHARED_MATH_ATAN2F128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atan2f128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atan2f128;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT128
+
+#endif // LLVM_LIBC_SHARED_MATH_ATAN2F128_H
diff --git a/libc/src/__support/math/CMakeLists.txt 
b/libc/src/__support/math/CMakeLists.txt
index c197b19ed29de..caafdc2cbf1d6 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -230,6 +230,21 @@ add_header_library(
 libc.src.__support.macros.optimization
 )
 
+add_header_library(
+  atan2f128
+  HDRS
+atan2f128.h
+  DEPENDS
+.atan_utils
+libc.src.__support.integer_literals
+libc.src.__support.uint128
+libc.src.__support.FPUtil.dyadic_float
+libc.src.__support.FPUtil.fp_bits
+libc.src.__support.FPUtil.multiply_add
+libc.src.__support.FPUtil.nearest_integer
+libc.src.__support.macros.optimization
+)
+
 add_header_library(
   atanf
   HDRS
diff --git a/libc/src/__support/math/atan2f128.h 
b/libc/src/__support/math/atan2f128.h
new file mode 100644
index 0..89efaf1fd72a0
--- /dev/null
+++ b/libc/src/__support/math/atan2f128.h
@@ -0,0 +1,212 @@
+//===-- Implementation header for atan2f128 -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "atan_utils.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/integer_literals.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/uint128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+// There are several range reduction steps we can take for atan2(y, x) as
+// follow:
+
+// * Range reduction 1: signness
+// atan2(y, x) will return a number between -PI and PI representing the angle
+// forming by the 0x axis and the vector (x, y) on the 0xy-plane.
+// In particular, we have that:
+//   atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant)
+//   = pi + atan( y/x )if x < 0 and y >= 0  (II-quadrant)
+//   = -pi + atan( y/x )   if x < 0 and y < 0   (III-quadrant)
+//   = atan( y/x ) if x >= 0 and y < 0  (IV-quadrant)
+// Since atan function is odd, we can use the formula:
+//   atan(-u) = -atan(u)
+// to adjust the a

[llvm-branch-commits] [libc] [llvm] [libc][math] Refactor atan2f implementation to header-only in src/__support/math folder. (PR #150993)

2025-07-30 Thread Muhammad Bassiouni via llvm-branch-commits

https://github.com/bassiounix updated 
https://github.com/llvm/llvm-project/pull/150993

>From 37d0403d9fbb96d117cc8ce90cdee667ee9f86b2 Mon Sep 17 00:00:00 2001
From: bassiounix 
Date: Mon, 28 Jul 2025 19:35:03 +0300
Subject: [PATCH] [libc][math] Refactor atan2f implementation to header-only in
 src/__support/math folder.

---
 libc/shared/math.h|   1 +
 libc/shared/math/atan2f.h |  23 ++
 libc/src/__support/math/CMakeLists.txt|  17 +
 libc/src/__support/math/atan2f.h  | 351 ++
 .../generic => __support/math}/atan2f_float.h |  21 +-
 libc/src/math/generic/CMakeLists.txt  |  12 +-
 libc/src/math/generic/atan2f.cpp  | 328 +---
 libc/test/shared/CMakeLists.txt   |   1 +
 libc/test/shared/shared_math_test.cpp |   1 +
 .../llvm-project-overlay/libc/BUILD.bazel |  20 +-
 10 files changed, 427 insertions(+), 348 deletions(-)
 create mode 100644 libc/shared/math/atan2f.h
 create mode 100644 libc/src/__support/math/atan2f.h
 rename libc/src/{math/generic => __support/math}/atan2f_float.h (95%)

diff --git a/libc/shared/math.h b/libc/shared/math.h
index 0605d918eb2af..527bb8d6214ae 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -24,6 +24,7 @@
 #include "math/asinhf16.h"
 #include "math/atan.h"
 #include "math/atan2.h"
+#include "math/atan2f.h"
 #include "math/atanf.h"
 #include "math/atanf16.h"
 #include "math/erff.h"
diff --git a/libc/shared/math/atan2f.h b/libc/shared/math/atan2f.h
new file mode 100644
index 0..2de09d25e19f8
--- /dev/null
+++ b/libc/shared/math/atan2f.h
@@ -0,0 +1,23 @@
+//===-- Shared atan2f function --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ATAN2F_H
+#define LLVM_LIBC_SHARED_MATH_ATAN2F_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/atan2f.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::atan2f;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_ATAN2F_H
diff --git a/libc/src/__support/math/CMakeLists.txt 
b/libc/src/__support/math/CMakeLists.txt
index bbb07b62552f6..c197b19ed29de 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -213,6 +213,23 @@ add_header_library(
 libc.src.__support.macros.optimization
 )
 
+add_header_library(
+  atan2f
+  HDRS
+atan2f_float.h
+atan2f.h
+  DEPENDS
+.inv_trigf_utils
+libc.src.__support.FPUtil.double_double
+libc.src.__support.FPUtil.fenv_impl
+libc.src.__support.FPUtil.fp_bits
+libc.src.__support.FPUtil.multiply_add
+libc.src.__support.FPUtil.nearest_integer
+libc.src.__support.FPUtil.polyeval
+libc.src.__support.macros.config
+libc.src.__support.macros.optimization
+)
+
 add_header_library(
   atanf
   HDRS
diff --git a/libc/src/__support/math/atan2f.h b/libc/src/__support/math/atan2f.h
new file mode 100644
index 0..e3b19329126f4
--- /dev/null
+++ b/libc/src/__support/math/atan2f.h
@@ -0,0 +1,351 @@
+//===-- Implementation header for atan2f *- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H
+
+#include "inv_trigf_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) &&   
\
+defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT)
+
+// We use float-float implementation to reduce size.
+#include "atan2f_float.h"
+
+#else
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace atan2f_internal {
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+// Look up tables for accurate pass:
+
+// atan(i/16) with i = 0..16, generated by Sollya with:
+// > for i from 0 to 16 do {
+// a = round(atan(i/16), D, RN);
+// b = round(atan(i/16) - a, D, RN);
+// print("{", b, ",", a, "},");
+//   };
+static constexpr fputil::DoubleDouble ATAN_I[17] = {
+{0.0, 0.0},
+{-0x1.c934d86d23

[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add v_cvt_sr|pk_bf8|fp8_f16 gfx1250 instructions (PR #151415)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-mc

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)


Changes



---

Patch is 122.29 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/151415.diff


28 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+4) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (+138) 
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+24) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+4) 
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+23-1) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp (+3) 
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+1) 
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+1) 
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2) 
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1) 
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.td (+2) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+3) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+1) 
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+51) 
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+16) 
- (added) llvm/test/CodeGen/AMDGPU/code-size-estimate-gfx1250.ll (+28) 
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll (+539) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s (+145) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s (+145) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s (+64) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s (+64) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s (+64) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s (+64) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s (+25) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt (+167-3) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt (+64) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt (+64) 


``diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index ec00fadf3039a..172ac467f7cad 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -702,6 +702,10 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", 
"gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc", 
"gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc", 
"gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")
 
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 2595442ba7f9e..1c67fc3879bff 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -398,6 +398,144 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
   out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
 }
 
+// CHECK-LABEL: @test_cvt_pk_bf8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:[[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[OUT_ADDR]] to ptr
+// CHECK-NEXT:[[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[A_ADDR]] to ptr
+// CHECK-NEXT:store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], 
align 8
+// CHECK-NEXT:store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 
4
+// CHECK-NEXT:[[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x 
half> [[TMP0]])
+// CHECK-NEXT:[[TMP2:%.*]] = load ptr addrspace(1), ptr 
[[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT:ret void
+//
+void test_cvt_pk_bf8_f16(global short* out, half2 a)
+{
+  *out = __builtin_amdgcn_cvt_pk_bf8_f16(a);
+}
+
+// CHECK-LABEL: @test_cvt_pk_fp8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:[[OUT_ADDR_ASCAST:%.*]] 

[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add v_cvt_sr|pk_bf8|fp8_f16 gfx1250 instructions (PR #151415)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: Stanislav Mekhanoshin (rampitec)


Changes



---

Patch is 122.29 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/151415.diff


28 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+4) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (+138) 
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+24) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+4) 
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+23-1) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp (+3) 
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+1) 
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+1) 
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2) 
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1) 
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.td (+2) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+3) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+1) 
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+51) 
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+16) 
- (added) llvm/test/CodeGen/AMDGPU/code-size-estimate-gfx1250.ll (+28) 
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll (+539) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s (+145) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s (+145) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s (+64) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s (+64) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s (+64) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s (+64) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s (+25) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt (+167-3) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt (+64) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt (+64) 


``diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index ec00fadf3039a..172ac467f7cad 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -702,6 +702,10 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", 
"gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc", 
"gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc", 
"gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")
 
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 2595442ba7f9e..1c67fc3879bff 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -398,6 +398,144 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
   out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
 }
 
+// CHECK-LABEL: @test_cvt_pk_bf8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:[[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[OUT_ADDR]] to ptr
+// CHECK-NEXT:[[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[A_ADDR]] to ptr
+// CHECK-NEXT:store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], 
align 8
+// CHECK-NEXT:store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 
4
+// CHECK-NEXT:[[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x 
half> [[TMP0]])
+// CHECK-NEXT:[[TMP2:%.*]] = load ptr addrspace(1), ptr 
[[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT:ret void
+//
+void test_cvt_pk_bf8_f16(global short* out, half2 a)
+{
+  *out = __builtin_amdgcn_cvt_pk_bf8_f16(a);
+}
+
+// CHECK-LABEL: @test_cvt_pk_fp8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:[[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5

[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add v_cvt_sr|pk_bf8|fp8_f16 gfx1250 instructions (PR #151415)

2025-07-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Stanislav Mekhanoshin (rampitec)


Changes



---

Patch is 122.29 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/151415.diff


28 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+4) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (+138) 
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+24) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+4) 
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+23-1) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+3) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp (+3) 
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+1) 
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+1) 
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2) 
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1) 
- (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.td (+2) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+3) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+1) 
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+51) 
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+16) 
- (added) llvm/test/CodeGen/AMDGPU/code-size-estimate-gfx1250.ll (+28) 
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll (+539) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s (+145) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s (+145) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16-fake16.s (+64) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp16.s (+64) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8-fake16.s (+64) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_dpp8.s (+64) 
- (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_err.s (+25) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt (+167-3) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp16.txt (+64) 
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_dpp8.txt (+64) 


``diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def 
b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index ec00fadf3039a..172ac467f7cad 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -702,6 +702,10 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", 
"gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc", 
"gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc", 
"gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")
 
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 2595442ba7f9e..1c67fc3879bff 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -398,6 +398,144 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
   out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
 }
 
+// CHECK-LABEL: @test_cvt_pk_bf8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:[[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[OUT_ADDR]] to ptr
+// CHECK-NEXT:[[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[A_ADDR]] to ptr
+// CHECK-NEXT:store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], 
align 8
+// CHECK-NEXT:store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:[[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 
4
+// CHECK-NEXT:[[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x 
half> [[TMP0]])
+// CHECK-NEXT:[[TMP2:%.*]] = load ptr addrspace(1), ptr 
[[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT:ret void
+//
+void test_cvt_pk_bf8_f16(global short* out, half2 a)
+{
+  *out = __builtin_amdgcn_cvt_pk_bf8_f16(a);
+}
+
+// CHECK-LABEL: @test_cvt_pk_fp8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
+// CHECK-NEXT:[[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:[[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 

[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add v_cvt_sr|pk_bf8|fp8_f16 gfx1250 instructions (PR #151415)

2025-07-30 Thread Stanislav Mekhanoshin via llvm-branch-commits

https://github.com/rampitec ready_for_review 
https://github.com/llvm/llvm-project/pull/151415
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [AMDGPU] Add v_cvt_sr|pk_bf8|fp8_f16 gfx1250 instructions (PR #151415)

2025-07-30 Thread Stanislav Mekhanoshin via llvm-branch-commits

rampitec wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/151415?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#151415** https://app.graphite.dev/github/pr/llvm/llvm-project/151415?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/151415?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#151389** https://app.graphite.dev/github/pr/llvm/llvm-project/151389?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#151385** https://app.graphite.dev/github/pr/llvm/llvm-project/151385?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#151379** https://app.graphite.dev/github/pr/llvm/llvm-project/151379?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/151415
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NFC] test/lit.cfg.py formatting (PR #151218)

2025-07-30 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/151218

>From ed3dd6b28f8182e078a9c4ed78a6293bfabfc92f Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 29 Jul 2025 13:01:10 -0700
Subject: [PATCH] [NFC] test/lit.cfg.py formatting

---
 llvm/test/lit.cfg.py | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 143cc3817bd08..1d190fd20e573 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -451,7 +451,7 @@ def version_int(ver):
 "%llvmdylib",
 "{}/libLLVM{}.{}".format(
 config.llvm_shlib_dir, config.llvm_shlib_ext, 
config.llvm_dylib_version
-)
+),
 )
 )
 
@@ -582,6 +582,7 @@ def have_ld64_plugin_support():
 if have_ld64_plugin_support():
 config.available_features.add("ld64_plugin")
 
+
 def host_unwind_supports_jit():
 # Do we expect the host machine to support JIT registration of clang's
 # default unwind info format for the host (e.g. eh-frames, compact-unwind,
@@ -589,7 +590,7 @@ def host_unwind_supports_jit():
 
 # Linux and the BSDs use DWARF eh-frames and all known unwinders support
 # register_frame at minimum.
-if platform.system() in [ "Linux", "FreeBSD", "NetBSD" ]:
+if platform.system() in ["Linux", "FreeBSD", "NetBSD"]:
 return True
 
 # Windows does not support frame info without the ORC runtime.
@@ -602,10 +603,7 @@ def host_unwind_supports_jit():
 # macOS 14.0.
 if platform.system() == "Darwin":
 
-assert (
-"arm64" in config.host_triple
-or "x86_64" in config.host_triple
-)
+assert "arm64" in config.host_triple or "x86_64" in config.host_triple
 
 if "x86_64" in config.host_triple:
 return True
@@ -627,6 +625,7 @@ def host_unwind_supports_jit():
 
 return False
 
+
 if host_unwind_supports_jit():
 config.available_features.add("host-unwind-supports-jit")
 

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Ignore FileCheck when profcheck is enabled (PR #151214)

2025-07-30 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/151214

>From 218c4bfd978b253c112f3910b4e5f768f2df49b0 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 29 Jul 2025 12:55:59 -0700
Subject: [PATCH] fixes

---
 llvm/test/lit.cfg.py | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 1d190fd20e573..43c7cf677a913 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -18,7 +18,17 @@
 config.name = "LLVM"
 
 # testFormat: The test format to use to interpret tests.
-config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell)
+extra_substitutions = extra_substitutions = (
+[
+(r"\| not FileCheck .*", "| tee /dev/null"),
+(r"\| FileCheck .*", "| tee /dev/null"),
+]
+if config.enable_profcheck
+else []
+)
+config.test_format = lit.formats.ShTest(
+not llvm_config.use_lit_shell, extra_substitutions
+)
 
 # suffixes: A list of file extensions to treat as test files. This is overriden
 # by individual lit.local.cfg files in the test subdirectories.
@@ -278,6 +288,7 @@ def get_asan_rtlib():
 ]
 )
 
+
 # Find (major, minor) version of ptxas
 def ptxas_version(ptxas):
 ptxas_cmd = subprocess.Popen([ptxas, "--version"], stdout=subprocess.PIPE)
@@ -602,7 +613,6 @@ def host_unwind_supports_jit():
 # compact-unwind only, and JIT'd registration is not available before
 # macOS 14.0.
 if platform.system() == "Darwin":
-
 assert "arm64" in config.host_triple or "x86_64" in config.host_triple
 
 if "x86_64" in config.host_triple:

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


  1   2   >