date:20250510

[llvm-branch-commits] [Github][CI] Make the new premerge system fail jobs on errors (PR #139359)

2025-05-10 Thread Mehdi Amini via llvm-branch-commits


https://github.com/joker-eph approved this pull request.


https://github.com/llvm/llvm-project/pull/139359
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [Github][CI] Make the new premerge system fail jobs on errors (PR #139359)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


https://github.com/tstellar approved this pull request.

LGTM.

https://github.com/llvm/llvm-project/pull/139359
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/20.x: Fix crash lowering stack guard on OpenBSD/aarch64. (#125416) (PR #136458)

2025-05-10 Thread via llvm-branch-commits


3405691582 wrote:

> @brad0 (or anyone else). If you would like to add a note about this fix in 
> the release notes (completely optional). Please reply to this comment with a 
> one or two sentence description of the fix. When you are done, please add the 
> release:note label to this PR.

Fixed compiler crash on OpenBSD/aarch64 when using stack protection by ensuring 
platform-specific guard variables are consistently referenced.


https://github.com/llvm/llvm-project/pull/136458
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/20.x: [RISCV] Fix assertion failure when using -fstack-clash-protection (#135248) (PR #139388)

2025-05-10 Thread via llvm-branch-commits


https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/139388

Backport b3d2dc321c5c78b7204696afe07fe6ef3375acfd

Requested by: @tstellar

>From ff4132ec328ed80be247856939dbf7345106cc55 Mon Sep 17 00:00:00 2001
From: Paul Kirth 
Date: Fri, 18 Apr 2025 09:12:52 -0700
Subject: [PATCH] [RISCV] Fix assertion failure when using
 -fstack-clash-protection (#135248)

We can't assume MBBI is still pointing at MBB if we've already expanded
a probe. We need to re-query the MBB from MBBI. Fixes #135206

Co-authored-by: Craig Topper 
(cherry picked from commit b3d2dc321c5c78b7204696afe07fe6ef3375acfd)
---
 llvm/lib/Target/RISCV/RISCVFrameLowering.cpp |  8 +-
 llvm/test/CodeGen/RISCV/pr135206.ll  | 84 
 2 files changed, 89 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/pr135206.ll

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp 
b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index bb2e5781c34db..6f4c1e16190f4 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -2135,11 +2135,13 @@ TargetStackID::Value 
RISCVFrameLowering::getStackIDForScalableVectors() const {
 }
 
 // Synthesize the probe loop.
-static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL,
+static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL,
  Register TargetReg, bool IsRVV) {
   assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP");
 
+  MachineBasicBlock &MBB = *MBBI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+
   auto &Subtarget = MF.getSubtarget();
   const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
   bool IsRV64 = Subtarget.is64Bit();
@@ -2228,7 +2230,7 @@ void RISCVFrameLowering::inlineStackProbe(MachineFunction 
&MF,
   MachineBasicBlock::iterator MBBI = MI->getIterator();
   DebugLoc DL = MBB.findDebugLoc(MBBI);
   Register TargetReg = MI->getOperand(1).getReg();
-  emitStackProbeInline(MF, MBB, MBBI, DL, TargetReg,
+  emitStackProbeInline(MBBI, DL, TargetReg,
(MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV));
   MBBI->eraseFromParent();
 }
diff --git a/llvm/test/CodeGen/RISCV/pr135206.ll 
b/llvm/test/CodeGen/RISCV/pr135206.ll
new file mode 100644
index 0..196e78d8ed8b9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr135206.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -mtriple riscv64 < %s -o - | FileCheck %s
+
+%"buff" = type { [4096 x i64] }
+
+declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)
+declare void @bar()
+
+define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" {
+; CHECK-LABEL: foo:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:addi sp, sp, -2032
+; CHECK-NEXT:sd ra, 2024(sp) # 8-byte Folded Spill
+; CHECK-NEXT:sd s0, 2016(sp) # 8-byte Folded Spill
+; CHECK-NEXT:sd s1, 2008(sp) # 8-byte Folded Spill
+; CHECK-NEXT:sd s2, 2000(sp) # 8-byte Folded Spill
+; CHECK-NEXT:sd s3, 1992(sp) # 8-byte Folded Spill
+; CHECK-NEXT:lui a0, 7
+; CHECK-NEXT:sub t1, sp, a0
+; CHECK-NEXT:lui t2, 1
+; CHECK-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:sub sp, sp, t2
+; CHECK-NEXT:sd zero, 0(sp)
+; CHECK-NEXT:bne sp, t1, .LBB0_1
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:addi sp, sp, -2048
+; CHECK-NEXT:addi sp, sp, -96
+; CHECK-NEXT:csrr t1, vlenb
+; CHECK-NEXT:lui t2, 1
+; CHECK-NEXT:  .LBB0_3: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:sub sp, sp, t2
+; CHECK-NEXT:sd zero, 0(sp)
+; CHECK-NEXT:sub t1, t1, t2
+; CHECK-NEXT:bge t1, t2, .LBB0_3
+; CHECK-NEXT:  # %bb.4:
+; CHECK-NEXT:sub sp, sp, t1
+; CHECK-NEXT:li a0, 86
+; CHECK-NEXT:addi s0, sp, 48
+; CHECK-NEXT:addi s1, sp, 32
+; CHECK-NEXT:addi s2, sp, 16
+; CHECK-NEXT:lui a1, 353637
+; CHECK-NEXT:vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:vmv.v.x v8, a0
+; CHECK-NEXT:lui a0, 8
+; CHECK-NEXT:addiw a0, a0, 32
+; CHECK-NEXT:add a0, sp, a0
+; CHECK-NEXT:vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; CHECK-NEXT:addiw a0, a1, 1622
+; CHECK-NEXT:vse8.v v8, (s0)
+; CHECK-NEXT:vse8.v v8, (s1)
+; CHECK-NEXT:vse8.v v8, (s2)
+; CHECK-NEXT:slli a1, a0, 32
+; CHECK-NEXT:add s3, a0, a1
+; CHECK-NEXT:sd s3, 64(sp)
+; CHECK-NEXT:call bar
+; CHECK-NEXT:lui a0, 8
+; CHECK-NEXT:addiw a0, a0, 32
+; CHECK-NEXT:add a0, sp, a0
+; CHECK-NEXT:vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
+; CHECK-NEXT:vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:vse8.v v8, (s0)
+; CHECK-NEXT:vse8.v v8, (s1)
+; CHECK-NEXT:vse8.v v8, (s2)
+; CHECK-NEXT:sd s3, 64(sp)
+; CHECK-NEXT:li a0, 0
+; CHECK-NEX

[llvm-branch-commits] [llvm] release/20.x: [RISCV] Fix assertion failure when using -fstack-clash-protection (#135248) (PR #139388)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: None (llvmbot)


Changes

Backport b3d2dc321c5c78b7204696afe07fe6ef3375acfd

Requested by: @tstellar

---
Full diff: https://github.com/llvm/llvm-project/pull/139388.diff


2 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVFrameLowering.cpp (+5-3) 
- (added) llvm/test/CodeGen/RISCV/pr135206.ll (+84) 


``diff
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp 
b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index bb2e5781c34db..6f4c1e16190f4 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -2135,11 +2135,13 @@ TargetStackID::Value 
RISCVFrameLowering::getStackIDForScalableVectors() const {
 }
 
 // Synthesize the probe loop.
-static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL,
+static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL,
  Register TargetReg, bool IsRVV) {
   assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP");
 
+  MachineBasicBlock &MBB = *MBBI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+
   auto &Subtarget = MF.getSubtarget();
   const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
   bool IsRV64 = Subtarget.is64Bit();
@@ -2228,7 +2230,7 @@ void RISCVFrameLowering::inlineStackProbe(MachineFunction 
&MF,
   MachineBasicBlock::iterator MBBI = MI->getIterator();
   DebugLoc DL = MBB.findDebugLoc(MBBI);
   Register TargetReg = MI->getOperand(1).getReg();
-  emitStackProbeInline(MF, MBB, MBBI, DL, TargetReg,
+  emitStackProbeInline(MBBI, DL, TargetReg,
(MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV));
   MBBI->eraseFromParent();
 }
diff --git a/llvm/test/CodeGen/RISCV/pr135206.ll 
b/llvm/test/CodeGen/RISCV/pr135206.ll
new file mode 100644
index 0..196e78d8ed8b9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr135206.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -mtriple riscv64 < %s -o - | FileCheck %s
+
+%"buff" = type { [4096 x i64] }
+
+declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)
+declare void @bar()
+
+define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" {
+; CHECK-LABEL: foo:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:addi sp, sp, -2032
+; CHECK-NEXT:sd ra, 2024(sp) # 8-byte Folded Spill
+; CHECK-NEXT:sd s0, 2016(sp) # 8-byte Folded Spill
+; CHECK-NEXT:sd s1, 2008(sp) # 8-byte Folded Spill
+; CHECK-NEXT:sd s2, 2000(sp) # 8-byte Folded Spill
+; CHECK-NEXT:sd s3, 1992(sp) # 8-byte Folded Spill
+; CHECK-NEXT:lui a0, 7
+; CHECK-NEXT:sub t1, sp, a0
+; CHECK-NEXT:lui t2, 1
+; CHECK-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:sub sp, sp, t2
+; CHECK-NEXT:sd zero, 0(sp)
+; CHECK-NEXT:bne sp, t1, .LBB0_1
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:addi sp, sp, -2048
+; CHECK-NEXT:addi sp, sp, -96
+; CHECK-NEXT:csrr t1, vlenb
+; CHECK-NEXT:lui t2, 1
+; CHECK-NEXT:  .LBB0_3: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:sub sp, sp, t2
+; CHECK-NEXT:sd zero, 0(sp)
+; CHECK-NEXT:sub t1, t1, t2
+; CHECK-NEXT:bge t1, t2, .LBB0_3
+; CHECK-NEXT:  # %bb.4:
+; CHECK-NEXT:sub sp, sp, t1
+; CHECK-NEXT:li a0, 86
+; CHECK-NEXT:addi s0, sp, 48
+; CHECK-NEXT:addi s1, sp, 32
+; CHECK-NEXT:addi s2, sp, 16
+; CHECK-NEXT:lui a1, 353637
+; CHECK-NEXT:vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:vmv.v.x v8, a0
+; CHECK-NEXT:lui a0, 8
+; CHECK-NEXT:addiw a0, a0, 32
+; CHECK-NEXT:add a0, sp, a0
+; CHECK-NEXT:vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; CHECK-NEXT:addiw a0, a1, 1622
+; CHECK-NEXT:vse8.v v8, (s0)
+; CHECK-NEXT:vse8.v v8, (s1)
+; CHECK-NEXT:vse8.v v8, (s2)
+; CHECK-NEXT:slli a1, a0, 32
+; CHECK-NEXT:add s3, a0, a1
+; CHECK-NEXT:sd s3, 64(sp)
+; CHECK-NEXT:call bar
+; CHECK-NEXT:lui a0, 8
+; CHECK-NEXT:addiw a0, a0, 32
+; CHECK-NEXT:add a0, sp, a0
+; CHECK-NEXT:vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
+; CHECK-NEXT:vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT:vse8.v v8, (s0)
+; CHECK-NEXT:vse8.v v8, (s1)
+; CHECK-NEXT:vse8.v v8, (s2)
+; CHECK-NEXT:sd s3, 64(sp)
+; CHECK-NEXT:li a0, 0
+; CHECK-NEXT:csrr a1, vlenb
+; CHECK-NEXT:add sp, sp, a1
+; CHECK-NEXT:lui a1, 8
+; CHECK-NEXT:addiw a1, a1, -1952
+; CHECK-NEXT:add sp, sp, a1
+; CHECK-NEXT:ld ra, 2024(sp) # 8-byte Folded Reload
+; CHECK-NEXT:ld s0, 2016(sp) # 8-byte Folded Reload
+; CHECK-NEXT:ld s1, 2008(sp) # 8-byte Folded Reload
+; CHECK-NEXT:ld s2, 2000(sp) # 8-byte Folded Reload
+; CHECK-NEXT:ld s3, 1992(sp) # 8-byte Folded Reload
+; CHECK-NEXT:addi sp,

[llvm-branch-commits] [llvm] release/20.x: [RISCV] Fix assertion failure when using -fstack-clash-protection (#135248) (PR #139388)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:

@preames What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/139388
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/20.x: [RISCV] Fix assertion failure when using -fstack-clash-protection (#135248) (PR #139388)

2025-05-10 Thread via llvm-branch-commits


eyestoreoye wrote:

Very clean code with good test coverage. This is a great example of best 
practices.

https://github.com/llvm/llvm-project/pull/139388
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)

2025-05-10 Thread Anutosh Bhat via llvm-branch-commits


https://github.com/anutosh491 updated 
https://github.com/llvm/llvm-project/pull/137620



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/20.x: AMDGPU: Stop emitting an error on illegal addrspacecasts (#127487) (PR #127496)

2025-05-10 Thread Nikita Popov via llvm-branch-commits


nikic wrote:

Closing this as https://github.com/llvm/llvm-project/pull/127751 has landed.

https://github.com/llvm/llvm-project/pull/127496
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/20.x: AMDGPU: Stop emitting an error on illegal addrspacecasts (#127487) (PR #127496)

2025-05-10 Thread Nikita Popov via llvm-branch-commits


https://github.com/nikic closed https://github.com/llvm/llvm-project/pull/127496
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/20.x: [clang-format] Fix a crash on formatting missing r_pare… (PR #139345)

2025-05-10 Thread Nikita Popov via llvm-branch-commits


https://github.com/nikic milestoned 
https://github.com/llvm/llvm-project/pull/139345
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Fix build on ppc64+musl (#120036) (PR #139389)

2025-05-10 Thread via llvm-branch-commits


https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/139389

Backport 801b519dfd01e21da0be17aa8f8dc2ceb0eb9e77

Requested by: @tstellar

>From 3632a99ac092c604b374d1f02681465e48d5 Mon Sep 17 00:00:00 2001
From: mojyack <66899529+mojy...@users.noreply.github.com>
Date: Fri, 11 Apr 2025 06:23:26 +0900
Subject: [PATCH] [sanitizer_common] Fix build on ppc64+musl (#120036)

In powerpc64-unknown-linux-musl, signal.h does not include asm/ptrace.h,
which causes "member access into incomplete type 'struct pt_regs'"
errors. Include the header explicitly to fix this.

Also in sanitizer_linux_libcdep.cpp, there is a usage of TlsPreTcbSize
which is not defined in such a platform. Guard the branch with macro.

(cherry picked from commit 801b519dfd01e21da0be17aa8f8dc2ceb0eb9e77)
---
 .../lib/sanitizer_common/sanitizer_linux.cpp|  4 
 .../sanitizer_common/sanitizer_linux_libcdep.cpp| 13 +++--
 .../sanitizer_platform_limits_posix.cpp |  2 +-
 .../sanitizer_stoptheworld_linux_libcdep.cpp|  3 ++-
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 7aa48d29d2d53..a4d526b4466c3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -86,6 +86,10 @@
 #include 
 #  endif
 
+#  if SANITIZER_LINUX && defined(__powerpc64__)
+#include 
+#  endif
+
 #  if SANITIZER_FREEBSD
 #include 
 #include 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
index e11eff13cd326..331e1c7d8d152 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -619,21 +619,22 @@ static void GetTls(uptr *addr, uptr *size) {
   *addr = tp - RoundUpTo(*size, align);
   *size = tp - *addr + ThreadDescriptorSize();
 #  else
-  if (SANITIZER_GLIBC)
-*size += 1664;
-  else if (SANITIZER_FREEBSD)
-*size += 128;  // RTLD_STATIC_TLS_EXTRA
-#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
+#if SANITIZER_GLIBC
+  *size += 1664;
+#elif SANITIZER_FREEBSD
+  *size += 128;  // RTLD_STATIC_TLS_EXTRA
+#  if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
   const uptr pre_tcb_size = TlsPreTcbSize();
   *addr -= pre_tcb_size;
   *size += pre_tcb_size;
-#else
+#  else
   // arm and aarch64 reserve two words at TP, so this underestimates the range.
   // However, this is sufficient for the purpose of finding the pointers to
   // thread-specific data keys.
   const uptr tcb_size = ThreadDescriptorSize();
   *addr -= tcb_size;
   *size += tcb_size;
+#  endif
 #endif
 #  endif
 #elif SANITIZER_NETBSD
diff --git 
a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
index a5311d266b0c4..ec5f2edab6a64 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
@@ -96,7 +96,7 @@
 # include 
 #if defined(__mips64) || defined(__aarch64__) || defined(__arm__) ||   
\
 defined(__hexagon__) || defined(__loongarch__) || SANITIZER_RISCV64 || 
\
-defined(__sparc__)
+defined(__sparc__) || defined(__powerpc64__)
 #  include 
 #  ifdef __arm__
 typedef struct user_fpregs elf_fpregset_t;
diff --git 
a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
index 945da99d41f4e..58d17d90c343a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
@@ -31,7 +31,8 @@
 #include  // for pid_t
 #include  // for iovec
 #include  // for NT_PRSTATUS
-#if (defined(__aarch64__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) && \
+#if (defined(__aarch64__) || defined(__powerpc64__) || \
+ SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) &&\
  !SANITIZER_ANDROID
 // GLIBC 2.20+ sys/user does not include asm/ptrace.h
 # include 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Fix build on ppc64+musl (#120036) (PR #139389)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: None (llvmbot)


Changes

Backport 801b519dfd01e21da0be17aa8f8dc2ceb0eb9e77

Requested by: @tstellar

---
Full diff: https://github.com/llvm/llvm-project/pull/139389.diff


4 Files Affected:

- (modified) compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp (+4) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp 
(+7-6) 
- (modified) 
compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp (+1-1) 
- (modified) 
compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp 
(+2-1) 


``diff
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 7aa48d29d2d53..a4d526b4466c3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -86,6 +86,10 @@
 #include 
 #  endif
 
+#  if SANITIZER_LINUX && defined(__powerpc64__)
+#include 
+#  endif
+
 #  if SANITIZER_FREEBSD
 #include 
 #include 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
index e11eff13cd326..331e1c7d8d152 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -619,21 +619,22 @@ static void GetTls(uptr *addr, uptr *size) {
   *addr = tp - RoundUpTo(*size, align);
   *size = tp - *addr + ThreadDescriptorSize();
 #  else
-  if (SANITIZER_GLIBC)
-*size += 1664;
-  else if (SANITIZER_FREEBSD)
-*size += 128;  // RTLD_STATIC_TLS_EXTRA
-#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
+#if SANITIZER_GLIBC
+  *size += 1664;
+#elif SANITIZER_FREEBSD
+  *size += 128;  // RTLD_STATIC_TLS_EXTRA
+#  if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
   const uptr pre_tcb_size = TlsPreTcbSize();
   *addr -= pre_tcb_size;
   *size += pre_tcb_size;
-#else
+#  else
   // arm and aarch64 reserve two words at TP, so this underestimates the range.
   // However, this is sufficient for the purpose of finding the pointers to
   // thread-specific data keys.
   const uptr tcb_size = ThreadDescriptorSize();
   *addr -= tcb_size;
   *size += tcb_size;
+#  endif
 #endif
 #  endif
 #elif SANITIZER_NETBSD
diff --git 
a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
index a5311d266b0c4..ec5f2edab6a64 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp
@@ -96,7 +96,7 @@
 # include 
 #if defined(__mips64) || defined(__aarch64__) || defined(__arm__) ||   
\
 defined(__hexagon__) || defined(__loongarch__) || SANITIZER_RISCV64 || 
\
-defined(__sparc__)
+defined(__sparc__) || defined(__powerpc64__)
 #  include 
 #  ifdef __arm__
 typedef struct user_fpregs elf_fpregset_t;
diff --git 
a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
index 945da99d41f4e..58d17d90c343a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
@@ -31,7 +31,8 @@
 #include  // for pid_t
 #include  // for iovec
 #include  // for NT_PRSTATUS
-#if (defined(__aarch64__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) && \
+#if (defined(__aarch64__) || defined(__powerpc64__) || \
+ SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) &&\
  !SANITIZER_ANDROID
 // GLIBC 2.20+ sys/user does not include asm/ptrace.h
 # include 

``




https://github.com/llvm/llvm-project/pull/139389
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Fix build on ppc64+musl (#120036) (PR #139389)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:

@vitalybuka What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/139389
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Fix build on ppc64+musl (#120036) (PR #139389)

2025-05-10 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/139389
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Fix build on ppc64+musl (#120036) (PR #139389)

2025-05-10 Thread via llvm-branch-commits


github-actions[bot] wrote:

⚠️ We detected that you are using a GitHub private e-mail address to contribute 
to the repo. Please turn off [Keep my email addresses 
private](https://github.com/settings/emails) setting in your account. See 
[LLVM 
Discourse](https://discourse.llvm.org/t/hidden-emails-on-github-should-we-do-something-about-it)
 for more information.

https://github.com/llvm/llvm-project/pull/139389
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/20.x: [RISCV] Fix assertion failure when using -fstack-clash-protection (#135248) (PR #139388)

2025-05-10 Thread via llvm-branch-commits


https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/139388
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)

2025-05-10 Thread Anutosh Bhat via llvm-branch-commits


anutosh491 wrote:

Hey @tstellar,

The CI is green. Should be good to go !

https://github.com/llvm/llvm-project/pull/137620
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [lld] 0019b7d - [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


Author: Anutosh Bhat
Date: 2025-05-10T11:12:53-07:00
New Revision: 0019b7d0ae0bcc65af065542fcfb48ea0eb55d38

URL: 
https://github.com/llvm/llvm-project/commit/0019b7d0ae0bcc65af065542fcfb48ea0eb55d38
DIFF: 
https://github.com/llvm/llvm-project/commit/0019b7d0ae0bcc65af065542fcfb48ea0eb55d38.diff

LOG: [wasm-ld] Refactor WasmSym from static globals to per-link context 
(#134970)

Towards

This change moves WasmSym from a static global struct to an instance
owned by Ctx, allowing it to be reset cleanly between linker runs. This
enables safe support for multiple invocations of wasm-ld within the same
process

Changes done

- Converted WasmSym from a static struct to a regular struct with
instance members.

- Added a std::unique_ptr wasmSym field inside Ctx.

- Reset wasmSym in Ctx::reset() to clear state between links.

- Replaced all WasmSym:: references with ctx.wasmSym->.

- Removed global symbol definitions from Symbols.cpp that are no longer
needed.

Clearing wasmSym in ctx.reset() ensures a clean slate for each link
invocation, preventing symbol leakage across runs—critical when using
wasm-ld/lld as a reentrant library where global state can cause subtle,
hard-to-debug errors.

-

Co-authored-by: Vassil Vassilev 
(cherry picked from commit 9cbbb74d370c09e13b8412f21dccb7d2c4afc6a4)

Added: 


Modified: 
lld/wasm/Config.h
lld/wasm/Driver.cpp
lld/wasm/InputChunks.cpp
lld/wasm/MarkLive.cpp
lld/wasm/OutputSections.cpp
lld/wasm/Symbols.cpp
lld/wasm/Symbols.h
lld/wasm/SyntheticSections.cpp
lld/wasm/Writer.cpp

Removed: 




diff  --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index 1fa6c42d9cd86..527edc11c48e3 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -32,6 +32,11 @@ class InputTable;
 class InputGlobal;
 class InputFunction;
 class Symbol;
+class DefinedData;
+class GlobalSymbol;
+class DefinedFunction;
+class UndefinedGlobal;
+class TableSymbol;
 
 // For --unresolved-symbols.
 enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic };
@@ -139,6 +144,107 @@ struct Ctx {
   llvm::SmallVector syntheticGlobals;
   llvm::SmallVector syntheticTables;
 
+  // linker-generated symbols
+  struct WasmSym {
+// __global_base
+// Symbol marking the start of the global section.
+DefinedData *globalBase;
+
+// __stack_pointer/__stack_low/__stack_high
+// Global that holds current value of stack pointer and data symbols 
marking
+// the start and end of the stack region.  stackPointer is initialized to
+// stackHigh and grows downwards towards stackLow
+GlobalSymbol *stackPointer;
+DefinedData *stackLow;
+DefinedData *stackHigh;
+
+// __tls_base
+// Global that holds the address of the base of the current thread's
+// TLS block.
+GlobalSymbol *tlsBase;
+
+// __tls_size
+// Symbol whose value is the size of the TLS block.
+GlobalSymbol *tlsSize;
+
+// __tls_size
+// Symbol whose value is the alignment of the TLS block.
+GlobalSymbol *tlsAlign;
+
+// __data_end
+// Symbol marking the end of the data and bss.
+DefinedData *dataEnd;
+
+// __heap_base/__heap_end
+// Symbols marking the beginning and end of the "heap". It starts at the 
end
+// of the data, bss and explicit stack, and extends to the end of the 
linear
+// memory allocated by wasm-ld. This region of memory is not used by the
+// linked code, so it may be used as a backing store for `sbrk` or `malloc`
+// implementations.
+DefinedData *heapBase;
+DefinedData *heapEnd;
+
+// __wasm_init_memory_flag
+// Symbol whose contents are nonzero iff memory has already been
+// initialized.
+DefinedData *initMemoryFlag;
+
+// __wasm_init_memory
+// Function that initializes passive data segments during instantiation.
+DefinedFunction *initMemory;
+
+// __wasm_call_ctors
+// Function that directly calls all ctors in priority order.
+DefinedFunction *callCtors;
+
+// __wasm_call_dtors
+// Function that calls the libc/etc. cleanup function.
+DefinedFunction *callDtors;
+
+// __wasm_apply_global_relocs
+// Function that applies relocations to wasm globals post-instantiation.
+// Unlike __wasm_apply_data_relocs this needs to run on every thread.
+DefinedFunction *applyGlobalRelocs;
+
+// __wasm_apply_tls_relocs
+// Like __wasm_apply_data_relocs but for TLS section.  These must be
+// delayed until __wasm_init_tls.
+DefinedFunction *applyTLSRelocs;
+
+// __wasm_apply_global_tls_relocs
+// Like applyGlobalRelocs but for globals that hold TLS addresses.  These
+// must be delayed until __wasm_init_tls.
+DefinedFunction *applyGlobalTLSRelocs;
+
+// __wasm_init_tls
+// Function that allocates thread-local storage and initializes it.
+DefinedFunction *initTLS;
+
+// Pointer to the function

[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/137620
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] 72ad9be - [Clang][MicrosoftMangle] Implement mangling for ConstantMatrixType (#134930)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


Author: Losy001
Date: 2025-05-10T13:05:55-07:00
New Revision: 72ad9be1e337f487c9db4dd634005d09f7bf2790

URL: 
https://github.com/llvm/llvm-project/commit/72ad9be1e337f487c9db4dd634005d09f7bf2790
DIFF: 
https://github.com/llvm/llvm-project/commit/72ad9be1e337f487c9db4dd634005d09f7bf2790.diff

LOG: [Clang][MicrosoftMangle] Implement mangling for ConstantMatrixType 
(#134930)

This pull request implements mangling for ConstantMatrixType, allowing
matrices to be used on Windows.

Related issues: #53158, #127127

This example code:
```cpp
#include 
#include 

typedef float Matrix4 __attribute__((matrix_type(4, 4)));

int main()
{
  printf("%s\n", typeid(Matrix4).name());
}
```
Outputs this:
```
struct __clang::__matrix
```

(cherry picked from commit f5a30f111dc4ad6422863722eb708059a68a9d5c)

Added: 
clang/test/CodeGenCXX/mangle-ms-matrix.cpp

Modified: 
clang/lib/AST/MicrosoftMangle.cpp

Removed: 




diff  --git a/clang/lib/AST/MicrosoftMangle.cpp 
b/clang/lib/AST/MicrosoftMangle.cpp
index 42b735ccf4a2c..74c995f2f97f0 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -3552,7 +3552,22 @@ void MicrosoftCXXNameMangler::mangleType(const 
DependentSizedExtVectorType *T,
 
 void MicrosoftCXXNameMangler::mangleType(const ConstantMatrixType *T,
  Qualifiers quals, SourceRange Range) {
-  Error(Range.getBegin(), "matrix type") << Range;
+  QualType EltTy = T->getElementType();
+  const BuiltinType *ET = EltTy->getAs();
+
+  llvm::SmallString<64> TemplateMangling;
+  llvm::raw_svector_ostream Stream(TemplateMangling);
+  MicrosoftCXXNameMangler Extra(Context, Stream);
+
+  Stream << "?$";
+
+  Extra.mangleSourceName("__matrix");
+  Extra.mangleType(EltTy, Range, QMM_Escape);
+
+  Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumRows()));
+  Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumColumns()));
+
+  mangleArtificialTagType(TagTypeKind::Struct, TemplateMangling, {"__clang"});
 }
 
 void MicrosoftCXXNameMangler::mangleType(const DependentSizedMatrixType *T,

diff  --git a/clang/test/CodeGenCXX/mangle-ms-matrix.cpp 
b/clang/test/CodeGenCXX/mangle-ms-matrix.cpp
new file mode 100644
index 0..b244aa6e33cfa
--- /dev/null
+++ b/clang/test/CodeGenCXX/mangle-ms-matrix.cpp
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -fenable-matrix -fms-extensions -fcxx-exceptions 
-ffreestanding -target-feature +avx -emit-llvm %s -o - -triple=i686-pc-win32 | 
FileCheck %s
+// RUN: %clang_cc1 -fenable-matrix -fms-extensions -fcxx-exceptions 
-ffreestanding -target-feature +avx -emit-llvm %s -o - -triple=i686-pc-win32 
-fexperimental-new-constant-interpreter | FileCheck %s
+
+typedef float __attribute__((matrix_type(4, 4))) m4x4f;
+typedef float __attribute__((matrix_type(2, 2))) m2x2f;
+
+typedef int __attribute__((matrix_type(4, 4))) m4x4i;
+typedef int __attribute__((matrix_type(2, 2))) m2x2i;
+
+void thow(int i) {
+  switch (i) {
+case 0: throw m4x4f();
+// CHECK: ??_R0U?$__matrix@M$03$03@__clang@@@8
+// CHECK: _CT??_R0U?$__matrix@M$03$03@__clang@@@864
+// CHECK: _CTA1U?$__matrix@M$03$03@__clang@@
+// CHECK: _TI1U?$__matrix@M$03$03@__clang@@
+case 1: throw m2x2f();
+// CHECK: ??_R0U?$__matrix@M$01$01@__clang@@@8
+// CHECK: _CT??_R0U?$__matrix@M$01$01@__clang@@@816
+// CHECK: _CTA1U?$__matrix@M$01$01@__clang@@
+// CHECK: _TI1U?$__matrix@M$01$01@__clang@@
+case 2: throw m4x4i();
+// CHECK: ??_R0U?$__matrix@H$03$03@__clang@@@8
+// CHECK: _CT??_R0U?$__matrix@H$03$03@__clang@@@864
+// CHECK: _CTA1U?$__matrix@H$03$03@__clang@@
+// CHECK: _TI1U?$__matrix@H$03$03@__clang@@
+case 3: throw m2x2i();
+// CHECK: ??_R0U?$__matrix@H$01$01@__clang@@@8
+// CHECK: _CT??_R0U?$__matrix@H$01$01@__clang@@@816
+// CHECK: _CTA1U?$__matrix@H$01$01@__clang@@
+// CHECK: _TI1U?$__matrix@H$01$01@__clang@@
+  }
+}
+
+void foo44f(m4x4f) {}
+// CHECK: define dso_local void @"?foo44f@@YAXU?$__matrix@M$03$03@__clang@@@Z"
+
+m4x4f rfoo44f() { return m4x4f(); }
+// CHECK: define dso_local noundef <16 x float> 
@"?rfoo44f@@YAU?$__matrix@M$03$03@__clang@@XZ"
+
+void foo22f(m2x2f) {}
+// CHECK: define dso_local void @"?foo22f@@YAXU?$__matrix@M$01$01@__clang@@@Z"
+
+m2x2f rfoo22f() { return m2x2f(); }
+// CHECK: define dso_local noundef <4 x float> 
@"?rfoo22f@@YAU?$__matrix@M$01$01@__clang@@XZ"
+
+void foo44i(m4x4i) {}
+// CHECK: define dso_local void @"?foo44i@@YAXU?$__matrix@H$03$03@__clang@@@Z"
+
+m4x4i rfoo44i() { return m4x4i(); }
+// CHECK: define dso_local noundef <16 x i32> 
@"?rfoo44i@@YAU?$__matrix@H$03$03@__clang@@XZ"
+
+void foo22i(m2x2i) {}
+// CHECK: define dso_local void @"?foo22i@@YAXU?$__matrix@H$01$01@__clang@@@Z"
+
+m2x2i rfoo22i() { return m2x2i(); }
+// CHECK: define dso_local noundef <4 x i32> 
@"?rfoo22i@@YAU?$__matrix@H$01$01@__clang@@XZ"
\ No newline at end of file

[llvm-branch-commits] [clang] 41c36d9 - [clang] Fix unused variable warning in MS mangler from constant matrix patch

2025-05-10 Thread Tom Stellard via llvm-branch-commits


Author: Reid Kleckner
Date: 2025-05-10T13:05:55-07:00
New Revision: 41c36d94080488cc938b1c1697c7e8353405cd75

URL: 
https://github.com/llvm/llvm-project/commit/41c36d94080488cc938b1c1697c7e8353405cd75
DIFF: 
https://github.com/llvm/llvm-project/commit/41c36d94080488cc938b1c1697c7e8353405cd75.diff

LOG: [clang] Fix unused variable warning in MS mangler from constant matrix 
patch

(cherry picked from commit ccdd55c518277d749eff878ffcb5ca3de55c2a60)

Added: 


Modified: 
clang/lib/AST/MicrosoftMangle.cpp

Removed: 




diff  --git a/clang/lib/AST/MicrosoftMangle.cpp 
b/clang/lib/AST/MicrosoftMangle.cpp
index 74c995f2f97f0..cb35dbd611204 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -3553,7 +3553,6 @@ void MicrosoftCXXNameMangler::mangleType(const 
DependentSizedExtVectorType *T,
 void MicrosoftCXXNameMangler::mangleType(const ConstantMatrixType *T,
  Qualifiers quals, SourceRange Range) {
   QualType EltTy = T->getElementType();
-  const BuiltinType *ET = EltTy->getAs();
 
   llvm::SmallString<64> TemplateMangling;
   llvm::raw_svector_ostream Stream(TemplateMangling);



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [SelectionDAGBuilder] Use address width when lowering ptrtoaddr (PR #139423)

2025-05-10 Thread Alexander Richardson via llvm-branch-commits


https://github.com/arichardson created 
https://github.com/llvm/llvm-project/pull/139423

Instead of just deferring to ptrtoint, we should truncate to the index
width and then perform the ZextOrTrunc.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [SelectionDAGBuilder] Use address width when lowering ptrtoaddr (PR #139423)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:



@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-selectiondag

Author: Alexander Richardson (arichardson)


Changes

Instead of just deferring to ptrtoint, we should truncate to the index
width and then perform the ZextOrTrunc.


---
Full diff: https://github.com/llvm/llvm-project/pull/139423.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+14-1) 
- (modified) llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll (+5-1) 


``diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e6651d000bd71..806bab5379bde 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3878,7 +3878,20 @@ void SelectionDAGBuilder::visitSIToFP(const User &I) {
 }
 
 void SelectionDAGBuilder::visitPtrToAddr(const User &I) {
-  visitPtrToInt(I);
+  const auto &TLI = DAG.getTargetLoweringInfo();
+  const DataLayout &DL = DAG.getDataLayout();
+  LLVMContext &Ctx = *DAG.getContext();
+  // ptrtoaddr is equivalent to a truncate of ptrtoint to address/index width
+  SDValue N = getValue(I.getOperand(0));
+  Type *PtrTy = I.getOperand(0)->getType();
+  EVT AddrVT = EVT::getIntegerVT(Ctx, DL.getPointerAddressSizeInBits(PtrTy));
+  if (auto *VTy = dyn_cast(PtrTy)) {
+Type *EltTy = VTy->getElementType();
+AddrVT = EVT::getVectorVT(Ctx, AddrVT, VTy->getElementCount());
+  }
+  N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), AddrVT);
+  N = DAG.getZExtOrTrunc(N, getCurSDLoc(), TLI.getValueType(DL, I.getType()));
+  setValue(&I, N);
 }
 
 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
diff --git a/llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll 
b/llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll
index 32b5d9441b61c..da4b531ab5b25 100644
--- a/llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll
@@ -32,8 +32,9 @@ define <2 x i64> @ptrtoaddr_vec(<2 x ptr addrspace(8)> %ptr) {
 ; CHECK-LABEL: ptrtoaddr_vec:
 ; CHECK:   ; %bb.0:
 ; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:v_mov_b32_e32 v3, v5
 ; CHECK-NEXT:v_mov_b32_e32 v2, v4
+; CHECK-NEXT:v_and_b32_e32 v1, 0x, v1
+; CHECK-NEXT:v_and_b32_e32 v3, 0x, v5
 ; CHECK-NEXT:s_setpc_b64 s[30:31]
   %ret = ptrtoaddr <2 x ptr addrspace(8)> %ptr to <2 x i64>
   ret <2 x i64> %ret
@@ -57,6 +58,9 @@ define i128 @ptrtoaddr_ext(ptr addrspace(8) %ptr) {
 ; CHECK-LABEL: ptrtoaddr_ext:
 ; CHECK:   ; %bb.0:
 ; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:v_and_b32_e32 v1, 0x, v1
+; CHECK-NEXT:v_mov_b32_e32 v2, 0
+; CHECK-NEXT:v_mov_b32_e32 v3, 0
 ; CHECK-NEXT:s_setpc_b64 s[30:31]
   %ret = ptrtoaddr ptr addrspace(8) %ptr to i128
   ret i128 %ret

``




https://github.com/llvm/llvm-project/pull/139423
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [SelectionDAGBuilder] Use address width when lowering ptrtoaddr (PR #139423)

2025-05-10 Thread Alexander Richardson via llvm-branch-commits


https://github.com/arichardson updated 
https://github.com/llvm/llvm-project/pull/139423

>From d4520dc202a500c80309cf09517b2a2823bf13ab Mon Sep 17 00:00:00 2001
From: Alex Richardson 
Date: Sat, 10 May 2025 17:33:01 -0700
Subject: [PATCH] remove unused variable

Created using spr 1.3.6-beta.1
---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 806bab5379bde..66b11030ce5a5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3885,10 +3885,8 @@ void SelectionDAGBuilder::visitPtrToAddr(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   Type *PtrTy = I.getOperand(0)->getType();
   EVT AddrVT = EVT::getIntegerVT(Ctx, DL.getPointerAddressSizeInBits(PtrTy));
-  if (auto *VTy = dyn_cast(PtrTy)) {
-Type *EltTy = VTy->getElementType();
+  if (auto *VTy = dyn_cast(PtrTy))
 AddrVT = EVT::getVectorVT(Ctx, AddrVT, VTy->getElementCount());
-  }
   N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), AddrVT);
   N = DAG.getZExtOrTrunc(N, getCurSDLoc(), TLI.getValueType(DL, I.getType()));
   setValue(&I, N);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [IR] Introduce the `ptrtoaddr` instruction (PR #139357)

2025-05-10 Thread Alexander Richardson via llvm-branch-commits


https://github.com/arichardson updated 
https://github.com/llvm/llvm-project/pull/139357

>From 25dc175562349410f161ef0e80246301d9a7ba79 Mon Sep 17 00:00:00 2001
From: Alex Richardson 
Date: Fri, 9 May 2025 22:43:37 -0700
Subject: [PATCH] fix docs build

Created using spr 1.3.6-beta.1
---
 llvm/docs/LangRef.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 2d18d0d97aaee..38be6918ff73c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -12435,7 +12435,7 @@ Example:
 .. _i_ptrtoaddr:
 
 '``ptrtoaddr .. to``' Instruction
-
+^
 
 Syntax:
 """

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-llvm-selectiondag

Author: Alexander Richardson (arichardson)


Changes

Of the 128-bits of buffer descriptor only 48 bits are address bits, so
following the discussion on 
https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54,
the logic conclusion is to set the index width to 48 bits instead of
the current value of 128.

Most of the test changes are mechanical datalayout updates, but there
is one actual change: the ptrmask test now uses .i48 instead of .i128
and I had to update SelectionDAGBuilder to correctly extend the mask.


---

Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/139419.diff


34 Files Affected:

- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) 
- (modified) clang/test/CodeGen/target-data.c (+2-2) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) 
- (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) 
- (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) 
- (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) 
- (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll 
(+1-1) 
- (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) 
- (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) 
- (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) 
- (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) 
- (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) 


``diff
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index c368200f3f739..056a3d6579fa5 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 =
 
 static const char *const DataLayoutStringAMDGCN =
 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
-"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
-"32-v48:64-v96:128"
-"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
-"-ni:7:8:9";
+"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-"
+
"v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-"
+"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9";
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 9cb00e8ee73d3..41a3f59b0fc81 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -176,12 +176,12 @@
 
 // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm 
%s \
 // RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v3

[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: Alexander Richardson (arichardson)


Changes

Of the 128-bits of buffer descriptor only 48 bits are address bits, so
following the discussion on 
https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54,
the logic conclusion is to set the index width to 48 bits instead of
the current value of 128.

Most of the test changes are mechanical datalayout updates, but there
is one actual change: the ptrmask test now uses .i48 instead of .i128
and I had to update SelectionDAGBuilder to correctly extend the mask.


---

Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/139419.diff


34 Files Affected:

- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) 
- (modified) clang/test/CodeGen/target-data.c (+2-2) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) 
- (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) 
- (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) 
- (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) 
- (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll 
(+1-1) 
- (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) 
- (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) 
- (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) 
- (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) 
- (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) 


``diff
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index c368200f3f739..056a3d6579fa5 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 =
 
 static const char *const DataLayoutStringAMDGCN =
 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
-"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
-"32-v48:64-v96:128"
-"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
-"-ni:7:8:9";
+"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-"
+
"v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-"
+"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9";
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 9cb00e8ee73d3..41a3f59b0fc81 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -176,12 +176,12 @@
 
 // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm 
%s \
 // RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:6

[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:



@llvm/pr-subscribers-clang

@llvm/pr-subscribers-llvm-transforms

Author: Alexander Richardson (arichardson)


Changes

Of the 128-bits of buffer descriptor only 48 bits are address bits, so
following the discussion on 
https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54,
the logic conclusion is to set the index width to 48 bits instead of
the current value of 128.

Most of the test changes are mechanical datalayout updates, but there
is one actual change: the ptrmask test now uses .i48 instead of .i128
and I had to update SelectionDAGBuilder to correctly extend the mask.


---

Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/139419.diff


34 Files Affected:

- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) 
- (modified) clang/test/CodeGen/target-data.c (+2-2) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) 
- (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) 
- (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) 
- (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) 
- (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll 
(+1-1) 
- (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) 
- (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) 
- (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) 
- (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) 
- (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) 


``diff
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index c368200f3f739..056a3d6579fa5 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 =
 
 static const char *const DataLayoutStringAMDGCN =
 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
-"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
-"32-v48:64-v96:128"
-"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
-"-ni:7:8:9";
+"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-"
+
"v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-"
+"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9";
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 9cb00e8ee73d3..41a3f59b0fc81 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -176,12 +176,12 @@
 
 // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm 
%s \
 // RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:3

[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Alexander Richardson (arichardson)


Changes

Of the 128-bits of buffer descriptor only 48 bits are address bits, so
following the discussion on 
https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54,
the logic conclusion is to set the index width to 48 bits instead of
the current value of 128.

Most of the test changes are mechanical datalayout updates, but there
is one actual change: the ptrmask test now uses .i48 instead of .i128
and I had to update SelectionDAGBuilder to correctly extend the mask.


---

Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/139419.diff


34 Files Affected:

- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) 
- (modified) clang/test/CodeGen/target-data.c (+2-2) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) 
- (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) 
- (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) 
- (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) 
- (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll 
(+1-1) 
- (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) 
- (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) 
- (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) 
- (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) 
- (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) 


``diff
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index c368200f3f739..056a3d6579fa5 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 =
 
 static const char *const DataLayoutStringAMDGCN =
 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
-"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
-"32-v48:64-v96:128"
-"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
-"-ni:7:8:9";
+"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-"
+
"v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-"
+"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9";
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 9cb00e8ee73d3..41a3f59b0fc81 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -176,12 +176,12 @@
 
 // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm 
%s \
 // RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:3

[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-llvm-globalisel

Author: Alexander Richardson (arichardson)


Changes

Of the 128-bits of buffer descriptor only 48 bits are address bits, so
following the discussion on 
https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54,
the logic conclusion is to set the index width to 48 bits instead of
the current value of 128.

Most of the test changes are mechanical datalayout updates, but there
is one actual change: the ptrmask test now uses .i48 instead of .i128
and I had to update SelectionDAGBuilder to correctly extend the mask.


---

Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/139419.diff


34 Files Affected:

- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) 
- (modified) clang/test/CodeGen/target-data.c (+2-2) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) 
- (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) 
- (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) 
- (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) 
- (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll 
(+1-1) 
- (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) 
- (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) 
- (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) 
- (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) 
- (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) 


``diff
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index c368200f3f739..056a3d6579fa5 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 =
 
 static const char *const DataLayoutStringAMDGCN =
 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
-"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
-"32-v48:64-v96:128"
-"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
-"-ni:7:8:9";
+"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-"
+
"v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-"
+"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9";
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 9cb00e8ee73d3..41a3f59b0fc81 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -176,12 +176,12 @@
 
 // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm 
%s \
 // RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:

[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: Alexander Richardson (arichardson)


Changes

Of the 128-bits of buffer descriptor only 48 bits are address bits, so
following the discussion on 
https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54,
the logic conclusion is to set the index width to 48 bits instead of
the current value of 128.

Most of the test changes are mechanical datalayout updates, but there
is one actual change: the ptrmask test now uses .i48 instead of .i128
and I had to update SelectionDAGBuilder to correctly extend the mask.


---
Full diff: https://github.com/llvm/llvm-project/pull/139419.diff


34 Files Affected:

- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) 
- (modified) clang/test/CodeGen/target-data.c (+2-2) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) 
- (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) 
- (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) 
- (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) 
- (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll 
(+1-1) 
- (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) 
- (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) 
- (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) 
- (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) 
- (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) 


``diff


  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;

[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-llvm-analysis

Author: Alexander Richardson (arichardson)


Changes

Of the 128-bits of buffer descriptor only 48 bits are address bits, so
following the discussion on 
https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54,
the logic conclusion is to set the index width to 48 bits instead of
the current value of 128.

Most of the test changes are mechanical datalayout updates, but there
is one actual change: the ptrmask test now uses .i48 instead of .i128
and I had to update SelectionDAGBuilder to correctly extend the mask.


---

Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/139419.diff


34 Files Affected:

- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) 
- (modified) clang/test/CodeGen/target-data.c (+2-2) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) 
- (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) 
- (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) 
- (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) 
- (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll 
(+1-1) 
- (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) 
- (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) 
- (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) 
- (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) 
- (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) 


``diff
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index c368200f3f739..056a3d6579fa5 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 =
 
 static const char *const DataLayoutStringAMDGCN =
 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
-"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
-"32-v48:64-v96:128"
-"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
-"-ni:7:8:9";
+"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-"
+
"v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-"
+"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9";
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 9cb00e8ee73d3..41a3f59b0fc81 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -176,12 +176,12 @@
 
 // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm 
%s \
 // RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32

[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)

2025-05-10 Thread Alexander Richardson via llvm-branch-commits



@@ -7970,17 +7970,26 @@ void SelectionDAGBuilder::visitIntrinsicCall(const 
CallInst &I,
 
 // On arm64_32, pointers are 32 bits when stored in memory, but
 // zero-extended to 64 bits when in registers.  Thus the mask is 32 bits to
-// match the index type, but the pointer is 64 bits, so the the mask must 
be
+// match the index type, but the pointer is 64 bits, so the mask must be
 // zero-extended up to 64 bits to match the pointer.
 EVT PtrVT =
 TLI.getValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
 EVT MemVT =
 TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
 assert(PtrVT == Ptr.getValueType());
-assert(MemVT == Mask.getValueType());
-if (MemVT != PtrVT)
+if (Mask.getValueType().getFixedSizeInBits() < MemVT.getFixedSizeInBits()) 
{
+  // For AMDGPU buffer descriptors the mask is 48 bits, but the pointer is
+  // 128-bit, so we have to pad the mask with ones for unused bits.
+  auto HighOnes =
+  DAG.getNode(ISD::SHL, sdl, PtrVT, DAG.getAllOnesConstant(sdl, PtrVT),
+  DAG.getConstant(Mask.getValueType().getFixedSizeInBits(),
+  sdl, PtrVT));
+  Mask = DAG.getNode(ISD::OR, sdl, PtrVT,
+ DAG.getZExtOrTrunc(Mask, sdl, PtrVT), HighOnes);

arichardson wrote:

Not sure if there is an easier way to one-pad the mask argument but this 
_should_ do the right thing.

https://github.com/llvm/llvm-project/pull/139419
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)

2025-05-10 Thread Alexander Richardson via llvm-branch-commits


https://github.com/arichardson created 
https://github.com/llvm/llvm-project/pull/139419

Of the 128-bits of buffer descriptor only 48 bits are address bits, so
following the discussion on 
https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54,
the logic conclusion is to set the index width to 48 bits instead of
the current value of 128.

Most of the test changes are mechanical datalayout updates, but there
is one actual change: the ptrmask test now uses .i48 instead of .i128
and I had to update SelectionDAGBuilder to correctly extend the mask.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)

2025-05-10 Thread Alexander Richardson via llvm-branch-commits


https://github.com/arichardson updated 
https://github.com/llvm/llvm-project/pull/139413



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)

2025-05-10 Thread Alexander Richardson via llvm-branch-commits


https://github.com/arichardson updated 
https://github.com/llvm/llvm-project/pull/139413



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)

2025-05-10 Thread Alexander Richardson via llvm-branch-commits



@@ -145,79 +145,79 @@ define amdgpu_ps ptr addrspace(7) 
@s_ptrmask_buffer_fat_ptr_i32_neg8(ptr addrspa
   ret ptr addrspace(7) %masked
 }
 
-define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128(ptr 
addrspace(8) %ptr, i128 %mask) {
-; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128:
+define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i48(ptr 
addrspace(8) %ptr, i48 %mask) {
+; GCN-LABEL: v_ptrmask_buffer_resource_variable_i48:
 ; GCN:   ; %bb.0:
 ; GCN-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:v_or_b32_e32 v5, 0x, v5
 ; GCN-NEXT:v_and_b32_e32 v1, v1, v5
 ; GCN-NEXT:v_and_b32_e32 v0, v0, v4
-; GCN-NEXT:v_and_b32_e32 v3, v3, v7
-; GCN-NEXT:v_and_b32_e32 v2, v2, v6
 ; GCN-NEXT:s_setpc_b64 s[30:31]
 ;
-; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128:
+; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i48:
 ; GFX10PLUS:   ; %bb.0:
 ; GFX10PLUS-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10PLUS-NEXT:v_or_b32_e32 v5, 0x, v5
 ; GFX10PLUS-NEXT:v_and_b32_e32 v0, v0, v4
 ; GFX10PLUS-NEXT:v_and_b32_e32 v1, v1, v5
-; GFX10PLUS-NEXT:v_and_b32_e32 v2, v2, v6
-; GFX10PLUS-NEXT:v_and_b32_e32 v3, v3, v7
 ; GFX10PLUS-NEXT:s_setpc_b64 s[30:31]
-  %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, 
i128 %mask)
+  %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, 
i48 %mask)
   ret ptr addrspace(8) %masked
 }
 
-define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128_neg8(ptr 
addrspace(8) %ptr) {
-; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8:
+define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i48_neg8(ptr 
addrspace(8) %ptr) {
+; GCN-LABEL: v_ptrmask_buffer_resource_variable_i48_neg8:
 ; GCN:   ; %bb.0:
 ; GCN-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:v_and_b32_e32 v0, -8, v0
 ; GCN-NEXT:s_setpc_b64 s[30:31]
 ;
-; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8:
+; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i48_neg8:
 ; GFX10PLUS:   ; %bb.0:
 ; GFX10PLUS-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:v_and_b32_e32 v0, -8, v0
 ; GFX10PLUS-NEXT:s_setpc_b64 s[30:31]
-  %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, 
i128 -8)
+  %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, 
i48 -8)
   ret ptr addrspace(8) %masked
 }
 
-define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128(ptr 
addrspace(8) inreg %ptr, i128 inreg %mask) {
-; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128:
+define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i48(ptr 
addrspace(8) inreg %ptr, i48 inreg %mask) {
+; GCN-LABEL: s_ptrmask_buffer_resource_variable_i48:
 ; GCN:   ; %bb.0:
-; GCN-NEXT:s_and_b64 s[4:5], s[4:5], s[8:9]
+; GCN-NEXT:s_or_b32 s7, s7, 0x

arichardson wrote:

I can't read AMDGPU assembly properly so would be good to double-check that 
this is correct.

https://github.com/llvm/llvm-project/pull/139419
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/125432

>From 9fe563bd5347ede6a08e12202f23f49b20b7b64f Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 31 Jan 2025 13:12:56 -0500
Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic
 vector.

After splitting, all elements are created. The two components must
be found by looking at the upper and lower half of EXTRACT_ELEMENT.
This change extends EltsFromConsecutiveLoads
to understand AtomicSDNode so that unused elements can be removed.

commit-id:b83937a8
---
 llvm/include/llvm/CodeGen/SelectionDAG.h  |   4 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  20 ++-
 .../SelectionDAGAddressAnalysis.cpp   |  30 ++--
 llvm/lib/Target/X86/X86ISelLowering.cpp   |  59 +--
 llvm/test/CodeGen/X86/atomic-load-store.ll| 149 ++
 5 files changed, 90 insertions(+), 172 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h 
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 87b6914f8a0ee..ab8bb517e6ae4 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1873,7 +1873,7 @@ class SelectionDAG {
   /// chain to the token factor. This ensures that the new memory node will 
have
   /// the same relative memory dependency position as the old load. Returns the
   /// new merged load chain.
-  SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
+  SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
 
   /// Topological-sort the AllNodes list and a
   /// assign a unique node id for each node in the DAG based on their
@@ -2311,7 +2311,7 @@ class SelectionDAG {
   /// merged. Check that both are nonvolatile and if LD is loading
   /// 'Bytes' bytes from a location that is 'Dist' units away from the
   /// location that the 'Base' load is loading from.
-  bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
+  bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
   unsigned Bytes, int Dist) const;
 
   /// Infer alignment of a load / store address. Return std::nullopt if it
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bbf1b0fd590ef..38b22078c8c44 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12215,7 +12215,7 @@ SDValue 
SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
   return TokenFactor;
 }
 
-SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
SDValue NewMemOp) {
   assert(isa(NewMemOp.getNode()) && "Expected a memop node");
   SDValue OldChain = SDValue(OldLoad, 1);
@@ -12905,17 +12905,21 @@ std::pair 
SelectionDAG::UnrollVectorOverflowOp(
 getBuildVector(NewOvVT, dl, OvScalars));
 }
 
-bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
-  LoadSDNode *Base,
+bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
+  MemSDNode *Base,
   unsigned Bytes,
   int Dist) const {
   if (LD->isVolatile() || Base->isVolatile())
 return false;
-  // TODO: probably too restrictive for atomics, revisit
-  if (!LD->isSimple())
-return false;
-  if (LD->isIndexed() || Base->isIndexed())
-return false;
+  if (auto Ld = dyn_cast(LD)) {
+if (!Ld->isSimple())
+  return false;
+if (Ld->isIndexed())
+  return false;
+  }
+  if (auto Ld = dyn_cast(Base))
+if (Ld->isIndexed())
+  return false;
   if (LD->getChain() != Base->getChain())
 return false;
   EVT VT = LD->getMemoryVT();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780..c29cb424c7a4c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, 
int64_t BitSize,
 }
 
 /// Parses tree in Ptr for base, index, offset addresses.
-static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
-   const SelectionDAG &DAG) {
+template 
+static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
   SDValue Ptr = N->getBasePtr();
 
   // (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
   bool IsIndexSignExt = false;
 
   // pre-inc/pre-dec ops are components of EA.
-  if (N->getAddressingMode() == ISD::PRE_INC) {
-if (auto *C = dyn_cast(N->getOffset()))

[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120716

>From e1eaeb6114c9b4a7b432d2655e699b2f7558e824 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 20 Dec 2024 06:14:28 -0500
Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector

AtomicExpand fails for aligned `load atomic ` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.

commit-id:f430c1af
---
 llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 -
 llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 30 +
 .../X86/expand-atomic-non-integer.ll  | 65 +++
 4 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp 
b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index c376de877ac7d..70f59eafc6ecb 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
 I->replaceAllUsesWith(V);
   } else if (HasResult) {
 Value *V;
-if (UseSizedLibcall)
-  V = Builder.CreateBitOrPointerCast(Result, I->getType());
-else {
+if (UseSizedLibcall) {
+  // Add bitcasts from Result's scalar type to I's  vector type
+  auto *PtrTy = dyn_cast(I->getType()->getScalarType());
+  auto *VTy = dyn_cast(I->getType());
+  if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
+unsigned AS = PtrTy->getAddressSpace();
+Value *BC = Builder.CreateBitCast(
+Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
+V = Builder.CreateIntToPtr(BC, I->getType());
+  } else
+V = Builder.CreateBitOrPointerCast(Result, I->getType());
+} else {
   V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
 AllocaAlignment);
   Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll 
b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29..eaa2ffd9b2731 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double 
%val1) {
   store atomic double %val1, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM:   @ %bb.0:
+; ARM-NEXT:ldr r0, [r0]
+; ARM-NEXT:dmb ish
+; ARM-NEXT:bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE:   @ %bb.0:
+; ARMOPTNONE-NEXT:ldr r0, [r0]
+; ARMOPTNONE-NEXT:dmb ish
+; ARMOPTNONE-NEXT:bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO:   @ %bb.0:
+; THUMBTWO-NEXT:ldr r0, [r0]
+; THUMBTWO-NEXT:dmb ish
+; THUMBTWO-NEXT:bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE:   @ %bb.0:
+; THUMBONE-NEXT:push {r7, lr}
+; THUMBONE-NEXT:movs r1, #0
+; THUMBONE-NEXT:mov r2, r1
+; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT:pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4:   @ %bb.0:
+; ARMV4-NEXT:push {r11, lr}
+; ARMV4-NEXT:mov r1, #2
+; ARMV4-NEXT:bl __atomic_load_4
+; ARMV4-NEXT:pop {r11, lr}
+; ARMV4-NEXT:mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6:   @ %bb.0:
+; ARMV6-NEXT:ldr r0, [r0]
+; ARMV6-NEXT:mov r1, #0
+; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT:bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM:   @ %bb.0:
+; THUMBM-NEXT:ldr r0, [r0]
+; THUMBM-NEXT:dmb sy
+; THUMBM-NEXT:bx lr
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index f72970d12b6eb..d3027e799 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -382,6 +382,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
   ret <2 x i32> %ret
 }
 
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_ptr_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:popq %rax
+; CHECK-NEXT:retq
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+
 define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
 ; CHECK3-LABEL: atomic_vec4_i8:
 ; CHECK3:   ## %bb.0:
@@ -405,6 +420,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
   ret <4 x i16> %ret
 }
 
+define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_ptr270:
+; CHECK:   ## %b

[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120716

>From e1eaeb6114c9b4a7b432d2655e699b2f7558e824 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 20 Dec 2024 06:14:28 -0500
Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector

AtomicExpand fails for aligned `load atomic ` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.

commit-id:f430c1af
---
 llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 -
 llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 30 +
 .../X86/expand-atomic-non-integer.ll  | 65 +++
 4 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp 
b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index c376de877ac7d..70f59eafc6ecb 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
 I->replaceAllUsesWith(V);
   } else if (HasResult) {
 Value *V;
-if (UseSizedLibcall)
-  V = Builder.CreateBitOrPointerCast(Result, I->getType());
-else {
+if (UseSizedLibcall) {
+  // Add bitcasts from Result's scalar type to I's  vector type
+  auto *PtrTy = dyn_cast(I->getType()->getScalarType());
+  auto *VTy = dyn_cast(I->getType());
+  if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
+unsigned AS = PtrTy->getAddressSpace();
+Value *BC = Builder.CreateBitCast(
+Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
+V = Builder.CreateIntToPtr(BC, I->getType());
+  } else
+V = Builder.CreateBitOrPointerCast(Result, I->getType());
+} else {
   V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
 AllocaAlignment);
   Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll 
b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29..eaa2ffd9b2731 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double 
%val1) {
   store atomic double %val1, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM:   @ %bb.0:
+; ARM-NEXT:ldr r0, [r0]
+; ARM-NEXT:dmb ish
+; ARM-NEXT:bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE:   @ %bb.0:
+; ARMOPTNONE-NEXT:ldr r0, [r0]
+; ARMOPTNONE-NEXT:dmb ish
+; ARMOPTNONE-NEXT:bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO:   @ %bb.0:
+; THUMBTWO-NEXT:ldr r0, [r0]
+; THUMBTWO-NEXT:dmb ish
+; THUMBTWO-NEXT:bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE:   @ %bb.0:
+; THUMBONE-NEXT:push {r7, lr}
+; THUMBONE-NEXT:movs r1, #0
+; THUMBONE-NEXT:mov r2, r1
+; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT:pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4:   @ %bb.0:
+; ARMV4-NEXT:push {r11, lr}
+; ARMV4-NEXT:mov r1, #2
+; ARMV4-NEXT:bl __atomic_load_4
+; ARMV4-NEXT:pop {r11, lr}
+; ARMV4-NEXT:mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6:   @ %bb.0:
+; ARMV6-NEXT:ldr r0, [r0]
+; ARMV6-NEXT:mov r1, #0
+; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT:bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM:   @ %bb.0:
+; THUMBM-NEXT:ldr r0, [r0]
+; THUMBM-NEXT:dmb sy
+; THUMBM-NEXT:bx lr
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index f72970d12b6eb..d3027e799 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -382,6 +382,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
   ret <2 x i32> %ret
 }
 
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_ptr_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:popq %rax
+; CHECK-NEXT:retq
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+
 define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
 ; CHECK3-LABEL: atomic_vec4_i8:
 ; CHECK3:   ## %bb.0:
@@ -405,6 +420,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
   ret <4 x i16> %ret
 }
 
+define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_ptr270:
+; CHECK:   ## %b

[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120598

>From c8fe66e31bdd3bbf5beeb6096e8e4ddaba12dcf6 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 11:19:39 -0500
Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size.

commit-id:2894ccd1
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  | 97 ++-
 llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++
 3 files changed, 153 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 89ea7ef4dbe89..bdfa5f7741ad3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
   SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8eee7a4c61fe6..f88b4d5693979 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, 
unsigned ResNo) {
 break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+Res = WidenVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:  Res = WidenVecRes_LOAD(N); break;
   case ISD::STEP_VECTOR:
   case ISD::SPLAT_VECTOR:
@@ -6014,6 +6017,74 @@ SDValue 
DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
  N->getOperand(1), N->getOperand(2));
 }
 
+/// Either return the same load or provide appropriate casts
+/// from the load and return that.
+static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
+ TypeSize LdWidth, TypeSize FirstVTWidth,
+ SDLoc dl, SelectionDAG &DAG) {
+  assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  if (!FirstVT.isVector()) {
+unsigned NumElts =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+  }
+  assert(FirstVT == WidenVT);
+  return LdOp;
+}
+
+static std::optional findMemType(SelectionDAG &DAG,
+  const TargetLowering &TLI, unsigned 
Width,
+  EVT WidenVT, unsigned Align,
+  unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  EVT WidenVT =
+  TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  EVT LdVT = LD->getMemoryVT();
+  SDLoc dl(LD);
+  assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors");
+  assert(LdVT.isScalableVector() == WidenVT.isScalableVector() &&
+ "Must be scalable");
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() &&
+ "Expected equivalent element types");
+
+  // Load information
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = LD->getAAInfo();
+
+  TypeSize LdWidth = LdVT.getSizeInBits();
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  TypeSize WidthDiff = WidenWidth - LdWidth;
+
+  // Find the vector type that can load from.
+  std::optional FirstVT =
+  findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0,
+  WidthDiff.getKnownMinValue());
+
+  if (!FirstVT)
+return SDValue();
+
+  SmallVector MemVTs;
+  TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+  SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT,
+   Chain, BasePtr, LD->getMemOperand());
+
+  // Load the element with one instruction.
+  SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth,
+ FirstVTWidth, dl, DAG);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new

[llvm-branch-commits] [llvm] [SelectionDAG] Split vector types for atomic load (PR #120640)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120640

>From eda6b72faabf12c032b87358f75e1627fe10e221 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 16:25:55 -0500
Subject: [PATCH] [SelectionDAG] Split vector types for atomic load

Vector types that aren't widened are split
so that a single ATOMIC_LOAD is issued for the entire vector at once.
This change utilizes the load vectorization infrastructure in
SelectionDAG in order to group the vectors. This enables SelectionDAG
to translate vectors with type bfloat,half.

commit-id:3a045357
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  37 
 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++
 3 files changed, 209 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index bdfa5f7741ad3..d8f402f529632 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f88b4d5693979..a3b30943c8e7d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SplitVecRes_STEP_VECTOR(N, Lo, Hi);
 break;
   case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::ATOMIC_LOAD:
+SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi);
+break;
   case ISD::LOAD:
 SplitVecRes_LOAD(cast(N), Lo, Hi);
 break;
@@ -1421,6 +1424,40 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SetSplitVector(SDValue(N, ResNo), Lo, Hi);
 }
 
+void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo,
+   SDValue &Hi) {
+  assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+ "Extended load during type legalization!");
+  SDLoc dl(LD);
+  EVT VT = LD->getValueType(0);
+  EVT LoVT, HiVT;
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+  SDValue Ch = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+
+  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+  EVT MemIntVT =
+  EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits());
+  SDValue ALD = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, MemIntVT, IntVT, Ch,
+  Ptr, LD->getMemOperand());
+
+  EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+  EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+  SDValue ExtractLo = DAG.getNode(ISD::TRUNCATE, dl, LoIntVT, ALD);
+  SDValue ExtractHi =
+  DAG.getNode(ISD::SRL, dl, IntVT, ALD,
+  DAG.getIntPtrConstant(VT.getSizeInBits() / 2, dl));
+  ExtractHi = DAG.getNode(ISD::TRUNCATE, dl, HiIntVT, ExtractHi);
+
+  Lo = DAG.getBitcast(LoVT, ExtractLo);
+  Hi = DAG.getBitcast(HiVT, ExtractHi);
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1));
+}
+
 void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
 MachinePointerInfo &MPI, SDValue &Ptr,
 uint64_t *ScaledOffset) {
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 3cf9e3c1a8dfa..6e2e9d4b21891 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -205,6 +205,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
   ret <2 x float> %ret
 }
 
+define <2 x half> @atomic_vec2_half(ptr %x) {
+; CHECK3-LABEL: atomic_vec2_half:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movl (%rdi), %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:shrl $16, %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm1
+; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = 
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_half:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movl (%rdi), %eax
+; CHECK0-NEXT:movl %eax, %ecx
+; CHECK0-NEXT:shrl

[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120387

>From 63a3178d7d13c697e81900def3e706e450ef3437 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:40:32 -0500
Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes.

Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.

commit-id:a06a5cc6
---
 llvm/test/CodeGen/X86/atomic-load-store.ll | 253 +
 1 file changed, 253 insertions(+)

diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 6efcbb80c0ce6..39e9fdfa5e62b 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   ret <1 x i64> %ret
 }
 
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_ptr:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movq (%rsp), %rax
+; CHECK3-NEXT:popq %rcx
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_ptr:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq (%rsp), %rax
+; CHECK0-NEXT:popq %rcx
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
+
 define <1 x half> @atomic_vec1_half(ptr %x) {
 ; CHECK3-LABEL: atomic_vec1_half:
 ; CHECK3:   ## %bb.0:
@@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) 
nounwind {
   %ret = load atomic <1 x double>, ptr %x acquire, align 8
   ret <1 x double> %ret
 }
+
+define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_i64:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movq (%rsp), %rax
+; CHECK3-NEXT:popq %rcx
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i64:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq (%rsp), %rax
+; CHECK0-NEXT:popq %rcx
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i64>, ptr %x acquire, align 4
+  ret <1 x i64> %ret
+}
+
+define <1 x double> @atomic_vec1_double(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_double:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK3-NEXT:popq %rax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_double:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:popq %rax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 4
+  ret <1 x double> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_i32:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK3-NEXT:popq %rax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_i32:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:popq %rax
+; CHECK0-NEXT:retq
+  %ret = load atomic <2 x i32>, ptr %x acquire, align 4
+  ret <2 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_float_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[

[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120385

>From 8671aa64d3d17d769e3d0ae57ffb38dda7176193 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:37:17 -0500
Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load

`load atomic <1 x T>` is not valid. This change legalizes
vector types of atomic load via scalarization in SelectionDAG
so that it can, for example, translate from `v1i32` to `i32`.

commit-id:5c36cc8c
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  15 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +-
 3 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 720393158aa5e..89ea7ef4dbe89 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
   SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+  SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_VSELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index d0b69b88748a9..8eee7a4c61fe6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, 
unsigned ResNo) {
 R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
 break;
   case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+R = ScalarizeVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:   R = 
ScalarizeVecRes_LOAD(cast(N));break;
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -458,6 +461,18 @@ SDValue 
DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
   return Op;
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+  SDValue Result = DAG.getAtomicLoad(
+  ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
+  N->getValueType(0).getVectorElementType(), N->getChain(), 
N->getBasePtr(),
+  N->getMemOperand());
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+  return Result;
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
   assert(N->isUnindexed() && "Indexed vector load?");
 
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 5bce4401f7bdb..d23cfb89f9fc8 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s --check-prefixes=CHECK,CHECK3
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s --check-prefixes=CHECK,CHECK0
 
 define void @test1(ptr %ptr, i32 %val1) {
 ; CHECK-LABEL: test1:
@@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) {
   %val = load atomic i32, ptr %ptr seq_cst, align 4
   ret i32 %val
 }
+
+define <1 x i32> @atomic_vec1_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i32:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movl (%rdi), %eax
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x i32>, ptr %x acquire, align 4
+  ret <1 x i32> %ret
+}
+
+define <1 x i8> @atomic_vec1_i8(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i8:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzbl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i8:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movb (%rdi), %al
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+  ret <1 x i8> %ret
+}
+
+define <1 x i16> @atomic_vec1_i16(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i16:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzwl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i16:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movw (%rdi), %ax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+  ret <1 x i16> %ret
+}
+
+define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
+; CHECK3-LABEL: atomic_ve

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/125432

>From 9fe563bd5347ede6a08e12202f23f49b20b7b64f Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 31 Jan 2025 13:12:56 -0500
Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic
 vector.

After splitting, all elements are created. The two components must
be found by looking at the upper and lower half of EXTRACT_ELEMENT.
This change extends EltsFromConsecutiveLoads
to understand AtomicSDNode so that unused elements can be removed.

commit-id:b83937a8
---
 llvm/include/llvm/CodeGen/SelectionDAG.h  |   4 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  20 ++-
 .../SelectionDAGAddressAnalysis.cpp   |  30 ++--
 llvm/lib/Target/X86/X86ISelLowering.cpp   |  59 +--
 llvm/test/CodeGen/X86/atomic-load-store.ll| 149 ++
 5 files changed, 90 insertions(+), 172 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h 
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 87b6914f8a0ee..ab8bb517e6ae4 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1873,7 +1873,7 @@ class SelectionDAG {
   /// chain to the token factor. This ensures that the new memory node will 
have
   /// the same relative memory dependency position as the old load. Returns the
   /// new merged load chain.
-  SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
+  SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
 
   /// Topological-sort the AllNodes list and a
   /// assign a unique node id for each node in the DAG based on their
@@ -2311,7 +2311,7 @@ class SelectionDAG {
   /// merged. Check that both are nonvolatile and if LD is loading
   /// 'Bytes' bytes from a location that is 'Dist' units away from the
   /// location that the 'Base' load is loading from.
-  bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
+  bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
   unsigned Bytes, int Dist) const;
 
   /// Infer alignment of a load / store address. Return std::nullopt if it
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bbf1b0fd590ef..38b22078c8c44 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12215,7 +12215,7 @@ SDValue 
SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
   return TokenFactor;
 }
 
-SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
SDValue NewMemOp) {
   assert(isa(NewMemOp.getNode()) && "Expected a memop node");
   SDValue OldChain = SDValue(OldLoad, 1);
@@ -12905,17 +12905,21 @@ std::pair 
SelectionDAG::UnrollVectorOverflowOp(
 getBuildVector(NewOvVT, dl, OvScalars));
 }
 
-bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
-  LoadSDNode *Base,
+bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
+  MemSDNode *Base,
   unsigned Bytes,
   int Dist) const {
   if (LD->isVolatile() || Base->isVolatile())
 return false;
-  // TODO: probably too restrictive for atomics, revisit
-  if (!LD->isSimple())
-return false;
-  if (LD->isIndexed() || Base->isIndexed())
-return false;
+  if (auto Ld = dyn_cast(LD)) {
+if (!Ld->isSimple())
+  return false;
+if (Ld->isIndexed())
+  return false;
+  }
+  if (auto Ld = dyn_cast(Base))
+if (Ld->isIndexed())
+  return false;
   if (LD->getChain() != Base->getChain())
 return false;
   EVT VT = LD->getMemoryVT();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780..c29cb424c7a4c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, 
int64_t BitSize,
 }
 
 /// Parses tree in Ptr for base, index, offset addresses.
-static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
-   const SelectionDAG &DAG) {
+template 
+static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
   SDValue Ptr = N->getBasePtr();
 
   // (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
   bool IsIndexSignExt = false;
 
   // pre-inc/pre-dec ops are components of EA.
-  if (N->getAddressingMode() == ISD::PRE_INC) {
-if (auto *C = dyn_cast(N->getOffset()))

[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120386

>From b24b74b999f530c75e41747bbb55736276b38852 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:38:23 -0500
Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG

When lowering atomic <1 x T> vector types with floats, selection can fail since
this pattern is unsupported. To support this, floats can be casted to
an integer type of the same size.

commit-id:f9d761c5
---
 llvm/lib/Target/X86/X86ISelLowering.cpp|  4 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++
 2 files changed, 41 insertions(+)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9f75fe8803cda..dfce56653af01 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const 
X86TargetMachine &TM,
 setOperationAction(Op, MVT::f32, Promote);
   }
 
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);
+
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
ISD::SCALAR_TO_VECTOR,
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index d23cfb89f9fc8..6efcbb80c0ce6 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   %ret = load atomic <1 x i64>, ptr %x acquire, align 8
   ret <1 x i64> %ret
 }
+
+define <1 x half> @atomic_vec1_half(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_half:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzwl (%rdi), %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_half:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movw (%rdi), %cx
+; CHECK0-NEXT:## implicit-def: $eax
+; CHECK0-NEXT:movw %cx, %ax
+; CHECK0-NEXT:## implicit-def: $xmm0
+; CHECK0-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x half>, ptr %x acquire, align 2
+  ret <1 x half> %ret
+}
+
+define <1 x float> @atomic_vec1_float(ptr %x) {
+; CHECK-LABEL: atomic_vec1_float:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x float>, ptr %x acquire, align 4
+  ret <1 x float> %ret
+}
+
+define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec1_double_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 8
+  ret <1 x double> %ret
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/125432

>From 9fe563bd5347ede6a08e12202f23f49b20b7b64f Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 31 Jan 2025 13:12:56 -0500
Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic
 vector.

After splitting, all elements are created. The two components must
be found by looking at the upper and lower half of EXTRACT_ELEMENT.
This change extends EltsFromConsecutiveLoads
to understand AtomicSDNode so that unused elements can be removed.

commit-id:b83937a8
---
 llvm/include/llvm/CodeGen/SelectionDAG.h  |   4 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  20 ++-
 .../SelectionDAGAddressAnalysis.cpp   |  30 ++--
 llvm/lib/Target/X86/X86ISelLowering.cpp   |  59 +--
 llvm/test/CodeGen/X86/atomic-load-store.ll| 149 ++
 5 files changed, 90 insertions(+), 172 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h 
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 87b6914f8a0ee..ab8bb517e6ae4 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1873,7 +1873,7 @@ class SelectionDAG {
   /// chain to the token factor. This ensures that the new memory node will 
have
   /// the same relative memory dependency position as the old load. Returns the
   /// new merged load chain.
-  SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
+  SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
 
   /// Topological-sort the AllNodes list and a
   /// assign a unique node id for each node in the DAG based on their
@@ -2311,7 +2311,7 @@ class SelectionDAG {
   /// merged. Check that both are nonvolatile and if LD is loading
   /// 'Bytes' bytes from a location that is 'Dist' units away from the
   /// location that the 'Base' load is loading from.
-  bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
+  bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
   unsigned Bytes, int Dist) const;
 
   /// Infer alignment of a load / store address. Return std::nullopt if it
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bbf1b0fd590ef..38b22078c8c44 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12215,7 +12215,7 @@ SDValue 
SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
   return TokenFactor;
 }
 
-SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
SDValue NewMemOp) {
   assert(isa(NewMemOp.getNode()) && "Expected a memop node");
   SDValue OldChain = SDValue(OldLoad, 1);
@@ -12905,17 +12905,21 @@ std::pair 
SelectionDAG::UnrollVectorOverflowOp(
 getBuildVector(NewOvVT, dl, OvScalars));
 }
 
-bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
-  LoadSDNode *Base,
+bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
+  MemSDNode *Base,
   unsigned Bytes,
   int Dist) const {
   if (LD->isVolatile() || Base->isVolatile())
 return false;
-  // TODO: probably too restrictive for atomics, revisit
-  if (!LD->isSimple())
-return false;
-  if (LD->isIndexed() || Base->isIndexed())
-return false;
+  if (auto Ld = dyn_cast(LD)) {
+if (!Ld->isSimple())
+  return false;
+if (Ld->isIndexed())
+  return false;
+  }
+  if (auto Ld = dyn_cast(Base))
+if (Ld->isIndexed())
+  return false;
   if (LD->getChain() != Base->getChain())
 return false;
   EVT VT = LD->getMemoryVT();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780..c29cb424c7a4c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, 
int64_t BitSize,
 }
 
 /// Parses tree in Ptr for base, index, offset addresses.
-static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
-   const SelectionDAG &DAG) {
+template 
+static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
   SDValue Ptr = N->getBasePtr();
 
   // (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
   bool IsIndexSignExt = false;
 
   // pre-inc/pre-dec ops are components of EA.
-  if (N->getAddressingMode() == ISD::PRE_INC) {
-if (auto *C = dyn_cast(N->getOffset()))

[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120598

>From c8fe66e31bdd3bbf5beeb6096e8e4ddaba12dcf6 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 11:19:39 -0500
Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size.

commit-id:2894ccd1
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  | 97 ++-
 llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++
 3 files changed, 153 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 89ea7ef4dbe89..bdfa5f7741ad3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
   SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8eee7a4c61fe6..f88b4d5693979 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, 
unsigned ResNo) {
 break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+Res = WidenVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:  Res = WidenVecRes_LOAD(N); break;
   case ISD::STEP_VECTOR:
   case ISD::SPLAT_VECTOR:
@@ -6014,6 +6017,74 @@ SDValue 
DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
  N->getOperand(1), N->getOperand(2));
 }
 
+/// Either return the same load or provide appropriate casts
+/// from the load and return that.
+static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
+ TypeSize LdWidth, TypeSize FirstVTWidth,
+ SDLoc dl, SelectionDAG &DAG) {
+  assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  if (!FirstVT.isVector()) {
+unsigned NumElts =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+  }
+  assert(FirstVT == WidenVT);
+  return LdOp;
+}
+
+static std::optional findMemType(SelectionDAG &DAG,
+  const TargetLowering &TLI, unsigned 
Width,
+  EVT WidenVT, unsigned Align,
+  unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  EVT WidenVT =
+  TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  EVT LdVT = LD->getMemoryVT();
+  SDLoc dl(LD);
+  assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors");
+  assert(LdVT.isScalableVector() == WidenVT.isScalableVector() &&
+ "Must be scalable");
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() &&
+ "Expected equivalent element types");
+
+  // Load information
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = LD->getAAInfo();
+
+  TypeSize LdWidth = LdVT.getSizeInBits();
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  TypeSize WidthDiff = WidenWidth - LdWidth;
+
+  // Find the vector type that can load from.
+  std::optional FirstVT =
+  findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0,
+  WidthDiff.getKnownMinValue());
+
+  if (!FirstVT)
+return SDValue();
+
+  SmallVector MemVTs;
+  TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+  SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT,
+   Chain, BasePtr, LD->getMemOperand());
+
+  // Load the element with one instruction.
+  SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth,
+ FirstVTWidth, dl, DAG);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new

[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120598

>From c8fe66e31bdd3bbf5beeb6096e8e4ddaba12dcf6 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 11:19:39 -0500
Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size.

commit-id:2894ccd1
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  | 97 ++-
 llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++
 3 files changed, 153 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 89ea7ef4dbe89..bdfa5f7741ad3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
   SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8eee7a4c61fe6..f88b4d5693979 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, 
unsigned ResNo) {
 break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+Res = WidenVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:  Res = WidenVecRes_LOAD(N); break;
   case ISD::STEP_VECTOR:
   case ISD::SPLAT_VECTOR:
@@ -6014,6 +6017,74 @@ SDValue 
DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
  N->getOperand(1), N->getOperand(2));
 }
 
+/// Either return the same load or provide appropriate casts
+/// from the load and return that.
+static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
+ TypeSize LdWidth, TypeSize FirstVTWidth,
+ SDLoc dl, SelectionDAG &DAG) {
+  assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  if (!FirstVT.isVector()) {
+unsigned NumElts =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+  }
+  assert(FirstVT == WidenVT);
+  return LdOp;
+}
+
+static std::optional findMemType(SelectionDAG &DAG,
+  const TargetLowering &TLI, unsigned 
Width,
+  EVT WidenVT, unsigned Align,
+  unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  EVT WidenVT =
+  TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  EVT LdVT = LD->getMemoryVT();
+  SDLoc dl(LD);
+  assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors");
+  assert(LdVT.isScalableVector() == WidenVT.isScalableVector() &&
+ "Must be scalable");
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() &&
+ "Expected equivalent element types");
+
+  // Load information
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = LD->getAAInfo();
+
+  TypeSize LdWidth = LdVT.getSizeInBits();
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  TypeSize WidthDiff = WidenWidth - LdWidth;
+
+  // Find the vector type that can load from.
+  std::optional FirstVT =
+  findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0,
+  WidthDiff.getKnownMinValue());
+
+  if (!FirstVT)
+return SDValue();
+
+  SmallVector MemVTs;
+  TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+  SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT,
+   Chain, BasePtr, LD->getMemOperand());
+
+  // Load the element with one instruction.
+  SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth,
+ FirstVTWidth, dl, DAG);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new

[llvm-branch-commits] [llvm] [SelectionDAG] Split vector types for atomic load (PR #120640)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120640

>From eda6b72faabf12c032b87358f75e1627fe10e221 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 16:25:55 -0500
Subject: [PATCH] [SelectionDAG] Split vector types for atomic load

Vector types that aren't widened are split
so that a single ATOMIC_LOAD is issued for the entire vector at once.
This change utilizes the load vectorization infrastructure in
SelectionDAG in order to group the vectors. This enables SelectionDAG
to translate vectors with type bfloat,half.

commit-id:3a045357
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  37 
 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++
 3 files changed, 209 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index bdfa5f7741ad3..d8f402f529632 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f88b4d5693979..a3b30943c8e7d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SplitVecRes_STEP_VECTOR(N, Lo, Hi);
 break;
   case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::ATOMIC_LOAD:
+SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi);
+break;
   case ISD::LOAD:
 SplitVecRes_LOAD(cast(N), Lo, Hi);
 break;
@@ -1421,6 +1424,40 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SetSplitVector(SDValue(N, ResNo), Lo, Hi);
 }
 
+void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo,
+   SDValue &Hi) {
+  assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+ "Extended load during type legalization!");
+  SDLoc dl(LD);
+  EVT VT = LD->getValueType(0);
+  EVT LoVT, HiVT;
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+  SDValue Ch = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+
+  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+  EVT MemIntVT =
+  EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits());
+  SDValue ALD = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, MemIntVT, IntVT, Ch,
+  Ptr, LD->getMemOperand());
+
+  EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+  EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+  SDValue ExtractLo = DAG.getNode(ISD::TRUNCATE, dl, LoIntVT, ALD);
+  SDValue ExtractHi =
+  DAG.getNode(ISD::SRL, dl, IntVT, ALD,
+  DAG.getIntPtrConstant(VT.getSizeInBits() / 2, dl));
+  ExtractHi = DAG.getNode(ISD::TRUNCATE, dl, HiIntVT, ExtractHi);
+
+  Lo = DAG.getBitcast(LoVT, ExtractLo);
+  Hi = DAG.getBitcast(HiVT, ExtractHi);
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1));
+}
+
 void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
 MachinePointerInfo &MPI, SDValue &Ptr,
 uint64_t *ScaledOffset) {
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 3cf9e3c1a8dfa..6e2e9d4b21891 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -205,6 +205,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
   ret <2 x float> %ret
 }
 
+define <2 x half> @atomic_vec2_half(ptr %x) {
+; CHECK3-LABEL: atomic_vec2_half:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movl (%rdi), %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:shrl $16, %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm1
+; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = 
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_half:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movl (%rdi), %eax
+; CHECK0-NEXT:movl %eax, %ecx
+; CHECK0-NEXT:shrl

[llvm-branch-commits] [clang] release/20.x: [Clang][MicrosoftMangle] Implement mangling for ConstantMatrixType (#134930) (PR #138017)

2025-05-10 Thread via llvm-branch-commits


github-actions[bot] wrote:

@tstellar (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/138017
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/20.x: [Clang][MicrosoftMangle] Implement mangling for ConstantMatrixType (#134930) (PR #138017)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/138017
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [mlir] [mlir][MemRef] Use specialized index ops to fold expand/collapse_shape (PR #138930)

2025-05-10 Thread via llvm-branch-commits


https://github.com/MaheshRavishankar approved this pull request.

The changes look good to me. It isn't strictly required,  by given that book h 
of us work on the same downstream project, does this pass with the said 
downstream project. But this looks good to me

https://github.com/llvm/llvm-project/pull/138930
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)

2025-05-10 Thread Alexander Richardson via llvm-branch-commits


https://github.com/arichardson created 
https://github.com/llvm/llvm-project/pull/139413

None


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Alexander Richardson (arichardson)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/139413.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp (+17) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll 
(+1-4) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index eb768ed9ad5a1..e5c8df0b162d8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -1361,6 +1361,7 @@ class SplitPtrStructs : public 
InstVisitor {
   PtrParts visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI);
   PtrParts visitGetElementPtrInst(GetElementPtrInst &GEP);
 
+  PtrParts visitPtrToAddrInst(PtrToAddrInst &PA);
   PtrParts visitPtrToIntInst(PtrToIntInst &PI);
   PtrParts visitIntToPtrInst(IntToPtrInst &IP);
   PtrParts visitAddrSpaceCastInst(AddrSpaceCastInst &I);
@@ -1952,6 +1953,22 @@ PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst 
&PI) {
   return {nullptr, nullptr};
 }
 
+PtrParts SplitPtrStructs::visitPtrToAddrInst(PtrToAddrInst &PA) {
+  Value *Ptr = PA.getPointerOperand();
+  if (!isSplitFatPtr(Ptr->getType()))
+return {nullptr, nullptr};
+  IRB.SetInsertPoint(&PA);
+
+  auto [Rsrc, Off] = getPtrParts(Ptr);
+  Value *Res = IRB.CreateIntCast(Off, PA.getType(), /*isSigned=*/false,
+ PA.getName() + ".off");
+  copyMetadata(Res, &PA);
+  Res->takeName(&PA);
+  SplitUsers.insert(&PA);
+  PA.replaceAllUsesWith(Res);
+  return {nullptr, nullptr};
+}
+
 PtrParts SplitPtrStructs::visitIntToPtrInst(IntToPtrInst &IP) {
   if (!isSplitFatPtr(IP.getType()))
 return {nullptr, nullptr};
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll 
b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
index 886f2238ac5ab..074c3cf7f3bbf 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
@@ -284,10 +284,7 @@ define i160 @ptrtoaddr_ext(ptr addrspace(7) %ptr) {
 ; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:[[PTR_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } 
[[PTR]], 0
 ; CHECK-NEXT:[[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } 
[[PTR]], 1
-; CHECK-NEXT:[[RET_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[PTR_RSRC]] to 
i160
-; CHECK-NEXT:[[TMP1:%.*]] = shl nuw i160 [[RET_RSRC]], 32
-; CHECK-NEXT:[[RET_OFF:%.*]] = zext i32 [[PTR_OFF]] to i160
-; CHECK-NEXT:[[RET:%.*]] = or i160 [[TMP1]], [[RET_OFF]]
+; CHECK-NEXT:[[RET:%.*]] = zext i32 [[PTR_OFF]] to i160
 ; CHECK-NEXT:ret i160 [[RET]]
 ;
   %ret = ptrtoaddr ptr addrspace(7) %ptr to i160

``




https://github.com/llvm/llvm-project/pull/139413
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)

2025-05-10 Thread Alexander Richardson via llvm-branch-commits


https://github.com/arichardson updated 
https://github.com/llvm/llvm-project/pull/139413

>From a2dec95d11a68c7911eef777ad78b07aa747bef5 Mon Sep 17 00:00:00 2001
From: Alex Richardson 
Date: Sat, 10 May 2025 15:35:50 -0700
Subject: [PATCH] remove fixme

Created using spr 1.3.6-beta.1
---
 .../test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll 
b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
index 074c3cf7f3bbf..538145a11c733 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
@@ -278,7 +278,6 @@ define <2 x i32> @ptrtoaddr_vec(<2 x ptr addrspace(7)> 
%ptr) {
 }
 
 ;; Check that we extend the offset to i160 instead of reinterpreting all bits.
-;; FIXME: this is not currently correct.
 define i160 @ptrtoaddr_ext(ptr addrspace(7) %ptr) {
 ; CHECK-LABEL: define i160 @ptrtoaddr_ext
 ; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]]) #[[ATTR0]] {

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/20.x: [Clang][MicrosoftMangle] Implement mangling for ConstantMatrixType (#134930) (PR #138017)

2025-05-10 Thread via llvm-branch-commits


https://github.com/llvmbot updated 
https://github.com/llvm/llvm-project/pull/138017



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)

2025-05-10 Thread via llvm-branch-commits


llvmbot wrote:




@llvm/pr-subscribers-mlir-llvm

Author: Alexander Richardson (arichardson)


Changes

Of the 128-bits of buffer descriptor only 48 bits are address bits, so
following the discussion on 
https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54,
the logic conclusion is to set the index width to 48 bits instead of
the current value of 128.

Most of the test changes are mechanical datalayout updates, but there
is one actual change: the ptrmask test now uses .i48 instead of .i128
and I had to update SelectionDAGBuilder to correctly extend the mask.


---

Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/139419.diff


34 Files Affected:

- (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) 
- (modified) clang/test/CodeGen/target-data.c (+2-2) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) 
- (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) 
- (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) 
- (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) 
- (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) 
- (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll 
(+1-1) 
- (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll 
(+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) 
- (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll 
(+1-1) 
- (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) 
- (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) 
- (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) 
- (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) 
- (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir 
(+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) 
- (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) 


``diff
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp 
b/clang/lib/Basic/Targets/AMDGPU.cpp
index c368200f3f739..056a3d6579fa5 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 =
 
 static const char *const DataLayoutStringAMDGCN =
 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
-"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
-"32-v48:64-v96:128"
-"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
-"-ni:7:8:9";
+"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-"
+
"v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-"
+"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9";
 
 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 9cb00e8ee73d3..41a3f59b0fc81 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -176,12 +176,12 @@
 
 // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm 
%s \
 // RUN: | FileCheck %s -check-prefix=R600SI
-// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
+// R600SI: target datalayout = 
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48

[llvm-branch-commits] [llvm] [IR] Introduce the `ptrtoaddr` instruction (PR #139357)

2025-05-10 Thread Alexander Richardson via llvm-branch-commits


https://github.com/arichardson updated 
https://github.com/llvm/llvm-project/pull/139357

>From 25dc175562349410f161ef0e80246301d9a7ba79 Mon Sep 17 00:00:00 2001
From: Alex Richardson 
Date: Fri, 9 May 2025 22:43:37 -0700
Subject: [PATCH] fix docs build

Created using spr 1.3.6-beta.1
---
 llvm/docs/LangRef.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 2d18d0d97aaee..38be6918ff73c 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -12435,7 +12435,7 @@ Example:
 .. _i_ptrtoaddr:
 
 '``ptrtoaddr .. to``' Instruction
-
+^
 
 Syntax:
 """

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [X86] Remove extra MOV after widening atomic load (PR #138635)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/138635

>From e5413e4fe5a5a55587785e7711fda4e3c544c1c9 Mon Sep 17 00:00:00 2001
From: jofernau_amdeng 
Date: Tue, 6 May 2025 01:48:11 -0400
Subject: [PATCH] [X86] Remove extra MOV after widening atomic load

This change adds patterns to optimize out an extra MOV
present after widening the atomic load.

commit-id:45989503
---
 llvm/lib/Target/X86/X86InstrCompiler.td|  7 
 llvm/test/CodeGen/X86/atomic-load-store.ll | 40 --
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td 
b/llvm/lib/Target/X86/X86InstrCompiler.td
index efa1e8bd7f3e3..786d0567280f9 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1204,6 +1204,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), 
(MOV16rm addr:$src)>;
 def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>;
 def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>;
 
+def : Pat<(v4i32 (scalar_to_vector (i32 (zext (i16 (atomic_load_16 
addr:$src)),
+   (MOVDI2PDIrm addr:$src)>;   // load atomic <2 x i8>
+def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src,
+   (MOVDI2PDIrm addr:$src)>;   // load atomic <2 x i16>
+def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src,
+   (MOV64toPQIrm  addr:$src)>; // load atomic <2 x i32,float>
+
 // Floating point loads/stores.
 def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
   (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 9ee8b4fc5ac7f..3cf9e3c1a8dfa 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -165,11 +165,15 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) {
 }
 
 define <2 x i16> @atomic_vec2_i16(ptr %x) {
-; CHECK-LABEL: atomic_vec2_i16:
-; CHECK:   ## %bb.0:
-; CHECK-NEXT:movl (%rdi), %eax
-; CHECK-NEXT:movd %eax, %xmm0
-; CHECK-NEXT:retq
+; CHECK3-LABEL: atomic_vec2_i16:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_i16:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK0-NEXT:retq
   %ret = load atomic <2 x i16>, ptr %x acquire, align 4
   ret <2 x i16> %ret
 }
@@ -177,8 +181,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) {
 define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
 ; CHECK-LABEL: atomic_vec2_ptr270:
 ; CHECK:   ## %bb.0:
-; CHECK-NEXT:movq (%rdi), %rax
-; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:movq (%rdi), %xmm0
 ; CHECK-NEXT:retq
   %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8
   ret <2 x ptr addrspace(270)> %ret
@@ -187,8 +190,7 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) 
{
 define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
 ; CHECK-LABEL: atomic_vec2_i32_align:
 ; CHECK:   ## %bb.0:
-; CHECK-NEXT:movq (%rdi), %rax
-; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:movq (%rdi), %xmm0
 ; CHECK-NEXT:retq
   %ret = load atomic <2 x i32>, ptr %x acquire, align 8
   ret <2 x i32> %ret
@@ -197,8 +199,7 @@ define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
 define <2 x float> @atomic_vec2_float_align(ptr %x) {
 ; CHECK-LABEL: atomic_vec2_float_align:
 ; CHECK:   ## %bb.0:
-; CHECK-NEXT:movq (%rdi), %rax
-; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:movq (%rdi), %xmm0
 ; CHECK-NEXT:retq
   %ret = load atomic <2 x float>, ptr %x acquire, align 8
   ret <2 x float> %ret
@@ -354,11 +355,15 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
 }
 
 define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
-; CHECK-LABEL: atomic_vec4_i8:
-; CHECK:   ## %bb.0:
-; CHECK-NEXT:movl (%rdi), %eax
-; CHECK-NEXT:movd %eax, %xmm0
-; CHECK-NEXT:retq
+; CHECK3-LABEL: atomic_vec4_i8:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec4_i8:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK0-NEXT:retq
   %ret = load atomic <4 x i8>, ptr %x acquire, align 4
   ret <4 x i8> %ret
 }
@@ -366,8 +371,7 @@ define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
 define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
 ; CHECK-LABEL: atomic_vec4_i16:
 ; CHECK:   ## %bb.0:
-; CHECK-NEXT:movq (%rdi), %rax
-; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:movq (%rdi), %xmm0
 ; CHECK-NEXT:retq
   %ret = load atomic <4 x i16>, ptr %x acquire, align 8
   ret <4 x i16> %ret

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bi

[llvm-branch-commits] [llvm] [SPARC] Use op-then-halve instructions when we have VIS3 (PR #135718)

2025-05-10 Thread Brad Smith via llvm-branch-commits


brad0 wrote:

@arsenm 

https://github.com/llvm/llvm-project/pull/135718
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/125432

>From bf8fc80f870022c2a42d01a500e2b16d648dd376 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 31 Jan 2025 13:12:56 -0500
Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic
 vector.

After splitting, all elements are created. The two components must
be found by looking at the upper and lower half of EXTRACT_ELEMENT.
This change extends EltsFromConsecutiveLoads
to understand AtomicSDNode so that unused elements can be removed.

commit-id:b83937a8
---
 llvm/include/llvm/CodeGen/SelectionDAG.h  |   4 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  20 ++-
 .../SelectionDAGAddressAnalysis.cpp   |  30 ++--
 llvm/lib/Target/X86/X86ISelLowering.cpp   |  59 +--
 llvm/test/CodeGen/X86/atomic-load-store.ll| 149 ++
 5 files changed, 90 insertions(+), 172 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h 
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 87b6914f8a0ee..ab8bb517e6ae4 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1873,7 +1873,7 @@ class SelectionDAG {
   /// chain to the token factor. This ensures that the new memory node will 
have
   /// the same relative memory dependency position as the old load. Returns the
   /// new merged load chain.
-  SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
+  SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
 
   /// Topological-sort the AllNodes list and a
   /// assign a unique node id for each node in the DAG based on their
@@ -2311,7 +2311,7 @@ class SelectionDAG {
   /// merged. Check that both are nonvolatile and if LD is loading
   /// 'Bytes' bytes from a location that is 'Dist' units away from the
   /// location that the 'Base' load is loading from.
-  bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
+  bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
   unsigned Bytes, int Dist) const;
 
   /// Infer alignment of a load / store address. Return std::nullopt if it
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bbf1b0fd590ef..38b22078c8c44 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12215,7 +12215,7 @@ SDValue 
SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
   return TokenFactor;
 }
 
-SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
SDValue NewMemOp) {
   assert(isa(NewMemOp.getNode()) && "Expected a memop node");
   SDValue OldChain = SDValue(OldLoad, 1);
@@ -12905,17 +12905,21 @@ std::pair 
SelectionDAG::UnrollVectorOverflowOp(
 getBuildVector(NewOvVT, dl, OvScalars));
 }
 
-bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
-  LoadSDNode *Base,
+bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
+  MemSDNode *Base,
   unsigned Bytes,
   int Dist) const {
   if (LD->isVolatile() || Base->isVolatile())
 return false;
-  // TODO: probably too restrictive for atomics, revisit
-  if (!LD->isSimple())
-return false;
-  if (LD->isIndexed() || Base->isIndexed())
-return false;
+  if (auto Ld = dyn_cast(LD)) {
+if (!Ld->isSimple())
+  return false;
+if (Ld->isIndexed())
+  return false;
+  }
+  if (auto Ld = dyn_cast(Base))
+if (Ld->isIndexed())
+  return false;
   if (LD->getChain() != Base->getChain())
 return false;
   EVT VT = LD->getMemoryVT();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780..c29cb424c7a4c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, 
int64_t BitSize,
 }
 
 /// Parses tree in Ptr for base, index, offset addresses.
-static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
-   const SelectionDAG &DAG) {
+template 
+static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
   SDValue Ptr = N->getBasePtr();
 
   // (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
   bool IsIndexSignExt = false;
 
   // pre-inc/pre-dec ops are components of EA.
-  if (N->getAddressingMode() == ISD::PRE_INC) {
-if (auto *C = dyn_cast(N->getOffset()))

[llvm-branch-commits] [libcxx] release/20.x: [libcxx] Provide locale conversions to tests through lit substitution (#105651) (PR #136449)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


tstellar wrote:

Do we still want to try to backport this one?

https://github.com/llvm/llvm-project/pull/136449
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] release/20.x: [LV] Fix crash when building partial reductions using types that aren't known scale factors (#136680) (PR #136863)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


tstellar wrote:

@NickGuy-Arm You can do the changes manually and create a new PR.

https://github.com/llvm/llvm-project/pull/136863
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Remove interceptors for deprecated struct termio (#137403) (PR #137707)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


tstellar wrote:

@hpax Do you think we should backport this one anyway?

https://github.com/llvm/llvm-project/pull/137707
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] release/20.x: [RISCV] Allow `Zicsr`/`Zifencei` to duplicate with `g` (#136842) (PR #137490)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


https://github.com/tstellar updated 
https://github.com/llvm/llvm-project/pull/137490



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [release/20.x] Support z17 processor name and scheduler description (PR #135413)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


https://github.com/tstellar updated 
https://github.com/llvm/llvm-project/pull/135413



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [release/20.x] Support z17 processor name and scheduler description (PR #135413)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/135413
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] [llvm] [release/20.x] Support z17 processor name and scheduler description (PR #135413)

2025-05-10 Thread via llvm-branch-commits


github-actions[bot] wrote:

@uweigand (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/135413
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] 4370072 - [clang] Forward TPL of NestedNameSpecifier

2025-05-10 Thread Tom Stellard via llvm-branch-commits


Author: Jonas Hahnfeld
Date: 2025-05-10T10:09:45-07:00
New Revision: 4370072022e5265d51b64182608e133277a24ac0

URL: 
https://github.com/llvm/llvm-project/commit/4370072022e5265d51b64182608e133277a24ac0
DIFF: 
https://github.com/llvm/llvm-project/commit/4370072022e5265d51b64182608e133277a24ac0.diff

LOG: [clang] Forward TPL of NestedNameSpecifier

This avoids type suffixes for integer constants when the type can be
inferred from the template parameter, such as the unsigned parameter
of A<1> and A<2> in the added test.

Added: 


Modified: 
clang/lib/AST/NestedNameSpecifier.cpp
clang/unittests/Tooling/QualTypeNamesTest.cpp

Removed: 




diff  --git a/clang/lib/AST/NestedNameSpecifier.cpp 
b/clang/lib/AST/NestedNameSpecifier.cpp
index 76c77569da9fd..c043996f1ada3 100644
--- a/clang/lib/AST/NestedNameSpecifier.cpp
+++ b/clang/lib/AST/NestedNameSpecifier.cpp
@@ -283,13 +283,16 @@ void NestedNameSpecifier::print(raw_ostream &OS, const 
PrintingPolicy &Policy,
   case TypeSpec: {
 const auto *Record =
 
dyn_cast_or_null(getAsRecordDecl());
-if (ResolveTemplateArguments && Record) {
+const TemplateParameterList *TPL = nullptr;
+if (Record) {
+  TPL = Record->getSpecializedTemplate()->getTemplateParameters();
+  if (ResolveTemplateArguments) {
 // Print the type trait with resolved template parameters.
 Record->printName(OS, Policy);
-printTemplateArgumentList(
-OS, Record->getTemplateArgs().asArray(), Policy,
-Record->getSpecializedTemplate()->getTemplateParameters());
+printTemplateArgumentList(OS, Record->getTemplateArgs().asArray(),
+  Policy, TPL);
 break;
+  }
 }
 const Type *T = getAsType();
 
@@ -313,8 +316,8 @@ void NestedNameSpecifier::print(raw_ostream &OS, const 
PrintingPolicy &Policy,
 TemplateName::Qualified::None);
 
   // Print the template argument list.
-  printTemplateArgumentList(OS, SpecType->template_arguments(),
-InnerPolicy);
+  printTemplateArgumentList(OS, SpecType->template_arguments(), 
InnerPolicy,
+TPL);
 } else if (const auto *DepSpecType =
dyn_cast(T)) {
   // Print the template name without its corresponding
@@ -322,7 +325,7 @@ void NestedNameSpecifier::print(raw_ostream &OS, const 
PrintingPolicy &Policy,
   OS << DepSpecType->getIdentifier()->getName();
   // Print the template argument list.
   printTemplateArgumentList(OS, DepSpecType->template_arguments(),
-InnerPolicy);
+InnerPolicy, TPL);
 } else {
   // Print the type normally
   QualType(T, 0).print(OS, InnerPolicy);

diff  --git a/clang/unittests/Tooling/QualTypeNamesTest.cpp 
b/clang/unittests/Tooling/QualTypeNamesTest.cpp
index 5ded64d4fcc8c..49c40d633ad4b 100644
--- a/clang/unittests/Tooling/QualTypeNamesTest.cpp
+++ b/clang/unittests/Tooling/QualTypeNamesTest.cpp
@@ -265,6 +265,102 @@ TEST(QualTypeNameTest, InlineNamespace) {
   TypeNameVisitor::Lang_CXX11);
 }
 
+TEST(QualTypeNameTest, TemplatedClass) {
+  std::unique_ptr AST =
+  tooling::buildASTFromCode("template  struct A {\n"
+"  template  struct B {};\n"
+"};\n"
+"template struct A<1>;\n"
+"template struct A<2u>;\n"
+"template struct A<1>::B<3>;\n"
+"template struct A<2u>::B<4u>;\n");
+
+  auto &Context = AST->getASTContext();
+  auto &Policy = Context.getPrintingPolicy();
+  auto getFullyQualifiedName = [&](QualType QT) {
+return TypeName::getFullyQualifiedName(QT, Context, Policy);
+  };
+
+  auto *A = Context.getTranslationUnitDecl()
+->lookup(&Context.Idents.get("A"))
+.find_first();
+  ASSERT_NE(A, nullptr);
+
+  // A has two explicit instantiations: A<1> and A<2u>
+  auto ASpec = A->spec_begin();
+  ASSERT_NE(ASpec, A->spec_end());
+  auto *A1 = *ASpec;
+  ASpec++;
+  ASSERT_NE(ASpec, A->spec_end());
+  auto *A2 = *ASpec;
+
+  // Their type names follow the records.
+  QualType A1RecordTy = Context.getRecordType(A1);
+  EXPECT_EQ(getFullyQualifiedName(A1RecordTy), "A<1>");
+  QualType A2RecordTy = Context.getRecordType(A2);
+  EXPECT_EQ(getFullyQualifiedName(A2RecordTy), "A<2U>");
+
+  // getTemplateSpecializationType() gives types that print the integral
+  // argument directly.
+  TemplateArgument Args1[] = {
+  {Context, llvm::APSInt::getUnsigned(1u), Context.UnsignedIntTy}};
+  QualType A1TemplateSpecTy =
+  Context.getTemplateSpecializationType(TemplateName(A), Args1, 
A1RecordTy);
+  EXPECT_EQ(A1TemplateSpecTy.getAsString(),

[llvm-branch-commits] [clang] release/20.x: [clang] Forward TPL of NestedNameSpecifier (PR #137806)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


https://github.com/tstellar closed 
https://github.com/llvm/llvm-project/pull/137806
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [clang] release/20.x: [clang] Forward TPL of NestedNameSpecifier (PR #137806)

2025-05-10 Thread Tom Stellard via llvm-branch-commits


https://github.com/tstellar updated 
https://github.com/llvm/llvm-project/pull/137806



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120387

>From d02434d4ca3ec809d7a72862408bb569f4043939 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:40:32 -0500
Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes.

Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.

commit-id:a06a5cc6
---
 llvm/test/CodeGen/X86/atomic-load-store.ll | 253 +
 1 file changed, 253 insertions(+)

diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 6efcbb80c0ce6..39e9fdfa5e62b 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   ret <1 x i64> %ret
 }
 
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_ptr:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movq (%rsp), %rax
+; CHECK3-NEXT:popq %rcx
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_ptr:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq (%rsp), %rax
+; CHECK0-NEXT:popq %rcx
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
+
 define <1 x half> @atomic_vec1_half(ptr %x) {
 ; CHECK3-LABEL: atomic_vec1_half:
 ; CHECK3:   ## %bb.0:
@@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) 
nounwind {
   %ret = load atomic <1 x double>, ptr %x acquire, align 8
   ret <1 x double> %ret
 }
+
+define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_i64:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movq (%rsp), %rax
+; CHECK3-NEXT:popq %rcx
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i64:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq (%rsp), %rax
+; CHECK0-NEXT:popq %rcx
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i64>, ptr %x acquire, align 4
+  ret <1 x i64> %ret
+}
+
+define <1 x double> @atomic_vec1_double(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_double:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK3-NEXT:popq %rax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_double:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:popq %rax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 4
+  ret <1 x double> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_i32:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK3-NEXT:popq %rax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_i32:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:popq %rax
+; CHECK0-NEXT:retq
+  %ret = load atomic <2 x i32>, ptr %x acquire, align 4
+  ret <2 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_float_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[

[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)

2025-05-10 Thread via llvm-branch-commits


github-actions[bot] wrote:

@anutosh491 (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/137620
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT][heatmap] Compute section utilization and partition score (PR #139193)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov edited 
https://github.com/llvm/llvm-project/pull/139193
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT][NFC] Disambiguate sample as basic/IP sample (PR #139350)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/139350

>From d32ce3f42af3da7ced12b0cc6b58e120deb83566 Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Fri, 9 May 2025 21:13:27 -0700
Subject: [PATCH] rename data members

Created using spr 1.3.4
---
 bolt/include/bolt/Profile/DataReader.h |  6 +++---
 bolt/lib/Profile/DataAggregator.cpp| 11 ++-
 bolt/lib/Profile/DataReader.cpp| 21 +++--
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/bolt/include/bolt/Profile/DataReader.h 
b/bolt/include/bolt/Profile/DataReader.h
index b91efca085c8c..fbd1dd1e68792 100644
--- a/bolt/include/bolt/Profile/DataReader.h
+++ b/bolt/include/bolt/Profile/DataReader.h
@@ -390,7 +390,7 @@ class DataReader : public ProfileReaderBase {
   FuncMemData *getMemDataForNames(const std::vector &FuncNames);
 
   FuncBasicSampleData *
-  getFuncSampleData(const std::vector &FuncNames);
+  getFuncBasicSampleData(const std::vector &FuncNames);
 
   /// Return a vector of all FuncBranchData matching the list of names.
   /// Internally use fuzzy matching to match special names like LTO-generated
@@ -433,7 +433,7 @@ class DataReader : public ProfileReaderBase {
   }
 
   using NamesToBranchesMapTy = std::map;
-  using NamesToSamplesMapTy = std::map;
+  using NamesToBasicSamplesMapTy = std::map;
   using NamesToMemEventsMapTy = std::map;
   using FuncsToBranchesMapTy =
   std::unordered_map;
@@ -496,7 +496,7 @@ class DataReader : public ProfileReaderBase {
   unsigned Line{0};
   unsigned Col{0};
   NamesToBranchesMapTy NamesToBranches;
-  NamesToSamplesMapTy NamesToSamples;
+  NamesToBasicSamplesMapTy NamesToBasicSamples;
   NamesToMemEventsMapTy NamesToMemEvents;
   FuncsToBranchesMapTy FuncsToBranches;
   FuncsToMemDataMapTy FuncsToMemData;
diff --git a/bolt/lib/Profile/DataAggregator.cpp 
b/bolt/lib/Profile/DataAggregator.cpp
index 7a85297fe5f0e..88dea2cef4476 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -572,7 +572,8 @@ void DataAggregator::processProfile(BinaryContext &BC) {
 if (FuncBranchData *FBD = getBranchData(BF)) {
   BF.markProfiled(BinaryFunction::PF_LBR);
   BF.RawSampleCount = FBD->getNumExecutedBranches();
-} else if (FuncBasicSampleData *FSD = getFuncSampleData(BF.getNames())) {
+} else if (FuncBasicSampleData *FSD =
+   getFuncBasicSampleData(BF.getNames())) {
   BF.markProfiled(BinaryFunction::PF_IP);
   BF.RawSampleCount = FSD->getSamples();
 }
@@ -644,11 +645,11 @@ bool DataAggregator::doBasicSample(BinaryFunction 
&OrigFunc, uint64_t Address,
   // Attach executed bytes to parent function in case of cold fragment.
   Func.SampleCountInBytes += Count * BlockSize;
 
-  auto I = NamesToSamples.find(Func.getOneName());
-  if (I == NamesToSamples.end()) {
+  auto I = NamesToBasicSamples.find(Func.getOneName());
+  if (I == NamesToBasicSamples.end()) {
 bool Success;
 StringRef LocName = getLocationName(Func, BAT);
-std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
+std::tie(I, Success) = NamesToBasicSamples.insert(std::make_pair(
 Func.getOneName(),
 FuncBasicSampleData(LocName, FuncBasicSampleData::ContainerTy(;
   }
@@ -2194,7 +2195,7 @@ DataAggregator::writeAggregatedFile(StringRef 
OutputFilename) const {
   OutFile << " " << Entry.getKey();
 OutFile << "\n";
 
-for (const auto &KV : NamesToSamples) {
+for (const auto &KV : NamesToBasicSamples) {
   const FuncBasicSampleData &FSD = KV.second;
   for (const BasicSampleInfo &SI : FSD.Data) {
 writeLocation(SI.Loc);
diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp
index fda62e8c073ea..3376bef9d3fd7 100644
--- a/bolt/lib/Profile/DataReader.cpp
+++ b/bolt/lib/Profile/DataReader.cpp
@@ -562,7 +562,7 @@ float DataReader::evaluateProfileData(BinaryFunction &BF,
 }
 
 void DataReader::readBasicSampleData(BinaryFunction &BF) {
-  FuncBasicSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames());
+  FuncBasicSampleData *SampleDataOrErr = getFuncBasicSampleData(BF.getNames());
   if (!SampleDataOrErr)
 return;
 
@@ -1090,10 +1090,10 @@ bool DataReader::hasMemData() {
 
 std::error_code DataReader::parseInNoLBRMode() {
   auto GetOrCreateFuncEntry = [&](StringRef Name) {
-auto I = NamesToSamples.find(Name);
-if (I == NamesToSamples.end()) {
+auto I = NamesToBasicSamples.find(Name);
+if (I == NamesToBasicSamples.end()) {
   bool Success;
-  std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
+  std::tie(I, Success) = NamesToBasicSamples.insert(std::make_pair(
   Name, FuncBasicSampleData(Name, 
FuncBasicSampleData::ContainerTy(;
 
   assert(Success && "unexpected result of insert");
@@ -1142,8 +1142,8 @@ std::error_code DataReader::parseInNoLBRMode() {
 I->second.Data.emplace_back(std::move(MI));
   }
 
-  for (auto &FuncSamples : Nam

[llvm-branch-commits] [llvm] [BOLT][NFC] Disambiguate sample as basic/IP sample (PR #139350)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/139350

>From d32ce3f42af3da7ced12b0cc6b58e120deb83566 Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Fri, 9 May 2025 21:13:27 -0700
Subject: [PATCH] rename data members

Created using spr 1.3.4
---
 bolt/include/bolt/Profile/DataReader.h |  6 +++---
 bolt/lib/Profile/DataAggregator.cpp| 11 ++-
 bolt/lib/Profile/DataReader.cpp| 21 +++--
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/bolt/include/bolt/Profile/DataReader.h 
b/bolt/include/bolt/Profile/DataReader.h
index b91efca085c8c..fbd1dd1e68792 100644
--- a/bolt/include/bolt/Profile/DataReader.h
+++ b/bolt/include/bolt/Profile/DataReader.h
@@ -390,7 +390,7 @@ class DataReader : public ProfileReaderBase {
   FuncMemData *getMemDataForNames(const std::vector &FuncNames);
 
   FuncBasicSampleData *
-  getFuncSampleData(const std::vector &FuncNames);
+  getFuncBasicSampleData(const std::vector &FuncNames);
 
   /// Return a vector of all FuncBranchData matching the list of names.
   /// Internally use fuzzy matching to match special names like LTO-generated
@@ -433,7 +433,7 @@ class DataReader : public ProfileReaderBase {
   }
 
   using NamesToBranchesMapTy = std::map;
-  using NamesToSamplesMapTy = std::map;
+  using NamesToBasicSamplesMapTy = std::map;
   using NamesToMemEventsMapTy = std::map;
   using FuncsToBranchesMapTy =
   std::unordered_map;
@@ -496,7 +496,7 @@ class DataReader : public ProfileReaderBase {
   unsigned Line{0};
   unsigned Col{0};
   NamesToBranchesMapTy NamesToBranches;
-  NamesToSamplesMapTy NamesToSamples;
+  NamesToBasicSamplesMapTy NamesToBasicSamples;
   NamesToMemEventsMapTy NamesToMemEvents;
   FuncsToBranchesMapTy FuncsToBranches;
   FuncsToMemDataMapTy FuncsToMemData;
diff --git a/bolt/lib/Profile/DataAggregator.cpp 
b/bolt/lib/Profile/DataAggregator.cpp
index 7a85297fe5f0e..88dea2cef4476 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -572,7 +572,8 @@ void DataAggregator::processProfile(BinaryContext &BC) {
 if (FuncBranchData *FBD = getBranchData(BF)) {
   BF.markProfiled(BinaryFunction::PF_LBR);
   BF.RawSampleCount = FBD->getNumExecutedBranches();
-} else if (FuncBasicSampleData *FSD = getFuncSampleData(BF.getNames())) {
+} else if (FuncBasicSampleData *FSD =
+   getFuncBasicSampleData(BF.getNames())) {
   BF.markProfiled(BinaryFunction::PF_IP);
   BF.RawSampleCount = FSD->getSamples();
 }
@@ -644,11 +645,11 @@ bool DataAggregator::doBasicSample(BinaryFunction 
&OrigFunc, uint64_t Address,
   // Attach executed bytes to parent function in case of cold fragment.
   Func.SampleCountInBytes += Count * BlockSize;
 
-  auto I = NamesToSamples.find(Func.getOneName());
-  if (I == NamesToSamples.end()) {
+  auto I = NamesToBasicSamples.find(Func.getOneName());
+  if (I == NamesToBasicSamples.end()) {
 bool Success;
 StringRef LocName = getLocationName(Func, BAT);
-std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
+std::tie(I, Success) = NamesToBasicSamples.insert(std::make_pair(
 Func.getOneName(),
 FuncBasicSampleData(LocName, FuncBasicSampleData::ContainerTy(;
   }
@@ -2194,7 +2195,7 @@ DataAggregator::writeAggregatedFile(StringRef 
OutputFilename) const {
   OutFile << " " << Entry.getKey();
 OutFile << "\n";
 
-for (const auto &KV : NamesToSamples) {
+for (const auto &KV : NamesToBasicSamples) {
   const FuncBasicSampleData &FSD = KV.second;
   for (const BasicSampleInfo &SI : FSD.Data) {
 writeLocation(SI.Loc);
diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp
index fda62e8c073ea..3376bef9d3fd7 100644
--- a/bolt/lib/Profile/DataReader.cpp
+++ b/bolt/lib/Profile/DataReader.cpp
@@ -562,7 +562,7 @@ float DataReader::evaluateProfileData(BinaryFunction &BF,
 }
 
 void DataReader::readBasicSampleData(BinaryFunction &BF) {
-  FuncBasicSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames());
+  FuncBasicSampleData *SampleDataOrErr = getFuncBasicSampleData(BF.getNames());
   if (!SampleDataOrErr)
 return;
 
@@ -1090,10 +1090,10 @@ bool DataReader::hasMemData() {
 
 std::error_code DataReader::parseInNoLBRMode() {
   auto GetOrCreateFuncEntry = [&](StringRef Name) {
-auto I = NamesToSamples.find(Name);
-if (I == NamesToSamples.end()) {
+auto I = NamesToBasicSamples.find(Name);
+if (I == NamesToBasicSamples.end()) {
   bool Success;
-  std::tie(I, Success) = NamesToSamples.insert(std::make_pair(
+  std::tie(I, Success) = NamesToBasicSamples.insert(std::make_pair(
   Name, FuncBasicSampleData(Name, 
FuncBasicSampleData::ContainerTy(;
 
   assert(Success && "unexpected result of insert");
@@ -1142,8 +1142,8 @@ std::error_code DataReader::parseInNoLBRMode() {
 I->second.Data.emplace_back(std::move(MI));
   }
 
-  for (auto &FuncSamples : Nam

[llvm-branch-commits] [clang] [clang-tools-extra] [llvm] [BOLT][heatmap] Use parsed basic/branch events (PR #136531)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/136531



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Build heatmap with pre-aggregated data (PR #138798)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/138798



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT][heatmap] Compute section utilization and partition score (PR #139193)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/139193

>From 0d16d90e829f7ce753abb5e50eca8e46a80872dc Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Fri, 9 May 2025 14:48:04 -0700
Subject: [PATCH] update

Created using spr 1.3.4
---
 bolt/lib/Profile/Heatmap.cpp  | 35 ++-
 bolt/test/X86/heatmap-preagg.test | 24 ++---
 2 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/bolt/lib/Profile/Heatmap.cpp b/bolt/lib/Profile/Heatmap.cpp
index dd73f3a13a9d3..7b74c44427cb8 100644
--- a/bolt/lib/Profile/Heatmap.cpp
+++ b/bolt/lib/Profile/Heatmap.cpp
@@ -297,7 +297,7 @@ void Heatmap::printSectionHotness(StringRef FileName) const 
{
 void Heatmap::printSectionHotness(raw_ostream &OS) const {
   uint64_t NumTotalCounts = 0;
   StringMap SectionHotness;
-  StringMap SectionUtilization;
+  StringMap BucketUtilization;
   unsigned TextSectionIndex = 0;
 
   if (TextSections.empty())
@@ -312,42 +312,39 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const {
 UnmappedHotness += Frequency;
   };
 
-  for (const auto [Bucket, Count] : Map) {
-NumTotalCounts += Count;
+  for (const std::pair &KV : Map) {
+NumTotalCounts += KV.second;
 // We map an address bucket to the first section (lowest address)
 // overlapping with that bucket.
-auto Address = Bucket * BucketSize;
+auto Address = KV.first * BucketSize;
 while (TextSectionIndex < TextSections.size() &&
Address >= TextSections[TextSectionIndex].EndAddress)
   TextSectionIndex++;
 if (TextSectionIndex >= TextSections.size() ||
 Address + BucketSize < TextSections[TextSectionIndex].BeginAddress) {
-  RecordUnmappedBucket(Address, Count);
+  RecordUnmappedBucket(Address, KV.second);
   continue;
 }
-StringRef Name = TextSections[TextSectionIndex].Name;
-SectionHotness[Name] += Count;
-++SectionUtilization[Name];
+SectionHotness[TextSections[TextSectionIndex].Name] += KV.second;
+++BucketUtilization[TextSections[TextSectionIndex].Name];
   }
 
-  auto getNumBuckets = [&](uint64_t Begin, uint64_t End) {
-return End / BucketSize + !!(End % BucketSize) - Begin / BucketSize;
-  };
-
   assert(NumTotalCounts > 0 &&
  "total number of heatmap buckets should be greater than 0");
 
   OS << "Section Name, Begin Address, End Address, Percentage Hotness, "
- << "Utilization Pct\n";
+ << "Utilization Pct, Partition Score\n";
   for (const auto [Name, Begin, End] : TextSections) {
-const float RelHotness = 100. * SectionHotness[Name] / NumTotalCounts;
-const float BucketUtilization =
-100. * SectionUtilization[Name] / getNumBuckets(Begin, End);
-OS << formatv("{0}, {1:x}, {2:x}, {3:f4}, {4:f4}\n", Name, Begin, End,
-  RelHotness, BucketUtilization);
+const float Hotness = 1. * SectionHotness[Name] / NumTotalCounts;
+const uint64_t NumBuckets =
+End / BucketSize + !!(End % BucketSize) - Begin / BucketSize;
+const float Utilization = 1. * BucketUtilization[Name] / NumBuckets;
+const float PartitionScore = Hotness * Utilization;
+OS << formatv("{0}, {1:x}, {2:x}, {3:f4}, {4:f4}, {5:f4}\n", Name, Begin,
+  End, 100. * Hotness, 100. * Utilization, PartitionScore);
   }
   if (UnmappedHotness > 0)
-OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0\n",
+OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0, 0\n",
   100.0 * UnmappedHotness / NumTotalCounts);
 }
 } // namespace bolt
diff --git a/bolt/test/X86/heatmap-preagg.test 
b/bolt/test/X86/heatmap-preagg.test
index 660d37fd03cbe..48f1683892881 100644
--- a/bolt/test/X86/heatmap-preagg.test
+++ b/bolt/test/X86/heatmap-preagg.test
@@ -17,19 +17,19 @@ RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT 
--input-file %t2-section-hotn
 CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries
 CHECK-HEATMAP: HEATMAP: invalid traces: 1
 
-CHECK-SEC-HOT: Section Name, Begin Address, End Address, Percentage Hotness, 
Utilization Pct
-CHECK-SEC-HOT-NEXT: .init, 0x401000, 0x40101b, 16.8545, 100.
-CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667
-CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064
-CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0., 0.
+CHECK-SEC-HOT: Section Name, Begin Address, End Address, Percentage Hotness, 
Utilization Pct, Partition Score
+CHECK-SEC-HOT-NEXT: .init, 0x401000, 0x40101b, 16.8545, 100., 0.1685
+CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667, 0.0317
+CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064, 0.6671
+CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0., 0., 0.
 
 CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries
 CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2
 
-CHECK-SEC-HOT-BAT: Section Name, Begin Address, End Address, Percentage 
Hotness, Utilization Pct
-CHECK-SEC-HOT-BAT-NEXT: .init, 0x401000, 0x40101b, 17.2888, 10

[llvm-branch-commits] [llvm] [BOLT][heatmap] Compute section utilization and partition score (PR #139193)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/139193

>From 0d16d90e829f7ce753abb5e50eca8e46a80872dc Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Fri, 9 May 2025 14:48:04 -0700
Subject: [PATCH] update

Created using spr 1.3.4
---
 bolt/lib/Profile/Heatmap.cpp  | 35 ++-
 bolt/test/X86/heatmap-preagg.test | 24 ++---
 2 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/bolt/lib/Profile/Heatmap.cpp b/bolt/lib/Profile/Heatmap.cpp
index dd73f3a13a9d3..7b74c44427cb8 100644
--- a/bolt/lib/Profile/Heatmap.cpp
+++ b/bolt/lib/Profile/Heatmap.cpp
@@ -297,7 +297,7 @@ void Heatmap::printSectionHotness(StringRef FileName) const 
{
 void Heatmap::printSectionHotness(raw_ostream &OS) const {
   uint64_t NumTotalCounts = 0;
   StringMap SectionHotness;
-  StringMap SectionUtilization;
+  StringMap BucketUtilization;
   unsigned TextSectionIndex = 0;
 
   if (TextSections.empty())
@@ -312,42 +312,39 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const {
 UnmappedHotness += Frequency;
   };
 
-  for (const auto [Bucket, Count] : Map) {
-NumTotalCounts += Count;
+  for (const std::pair &KV : Map) {
+NumTotalCounts += KV.second;
 // We map an address bucket to the first section (lowest address)
 // overlapping with that bucket.
-auto Address = Bucket * BucketSize;
+auto Address = KV.first * BucketSize;
 while (TextSectionIndex < TextSections.size() &&
Address >= TextSections[TextSectionIndex].EndAddress)
   TextSectionIndex++;
 if (TextSectionIndex >= TextSections.size() ||
 Address + BucketSize < TextSections[TextSectionIndex].BeginAddress) {
-  RecordUnmappedBucket(Address, Count);
+  RecordUnmappedBucket(Address, KV.second);
   continue;
 }
-StringRef Name = TextSections[TextSectionIndex].Name;
-SectionHotness[Name] += Count;
-++SectionUtilization[Name];
+SectionHotness[TextSections[TextSectionIndex].Name] += KV.second;
+++BucketUtilization[TextSections[TextSectionIndex].Name];
   }
 
-  auto getNumBuckets = [&](uint64_t Begin, uint64_t End) {
-return End / BucketSize + !!(End % BucketSize) - Begin / BucketSize;
-  };
-
   assert(NumTotalCounts > 0 &&
  "total number of heatmap buckets should be greater than 0");
 
   OS << "Section Name, Begin Address, End Address, Percentage Hotness, "
- << "Utilization Pct\n";
+ << "Utilization Pct, Partition Score\n";
   for (const auto [Name, Begin, End] : TextSections) {
-const float RelHotness = 100. * SectionHotness[Name] / NumTotalCounts;
-const float BucketUtilization =
-100. * SectionUtilization[Name] / getNumBuckets(Begin, End);
-OS << formatv("{0}, {1:x}, {2:x}, {3:f4}, {4:f4}\n", Name, Begin, End,
-  RelHotness, BucketUtilization);
+const float Hotness = 1. * SectionHotness[Name] / NumTotalCounts;
+const uint64_t NumBuckets =
+End / BucketSize + !!(End % BucketSize) - Begin / BucketSize;
+const float Utilization = 1. * BucketUtilization[Name] / NumBuckets;
+const float PartitionScore = Hotness * Utilization;
+OS << formatv("{0}, {1:x}, {2:x}, {3:f4}, {4:f4}, {5:f4}\n", Name, Begin,
+  End, 100. * Hotness, 100. * Utilization, PartitionScore);
   }
   if (UnmappedHotness > 0)
-OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0\n",
+OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0, 0\n",
   100.0 * UnmappedHotness / NumTotalCounts);
 }
 } // namespace bolt
diff --git a/bolt/test/X86/heatmap-preagg.test 
b/bolt/test/X86/heatmap-preagg.test
index 660d37fd03cbe..48f1683892881 100644
--- a/bolt/test/X86/heatmap-preagg.test
+++ b/bolt/test/X86/heatmap-preagg.test
@@ -17,19 +17,19 @@ RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT 
--input-file %t2-section-hotn
 CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries
 CHECK-HEATMAP: HEATMAP: invalid traces: 1
 
-CHECK-SEC-HOT: Section Name, Begin Address, End Address, Percentage Hotness, 
Utilization Pct
-CHECK-SEC-HOT-NEXT: .init, 0x401000, 0x40101b, 16.8545, 100.
-CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667
-CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064
-CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0., 0.
+CHECK-SEC-HOT: Section Name, Begin Address, End Address, Percentage Hotness, 
Utilization Pct, Partition Score
+CHECK-SEC-HOT-NEXT: .init, 0x401000, 0x40101b, 16.8545, 100., 0.1685
+CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667, 0.0317
+CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064, 0.6671
+CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0., 0., 0.
 
 CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries
 CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2
 
-CHECK-SEC-HOT-BAT: Section Name, Begin Address, End Address, Percentage 
Hotness, Utilization Pct
-CHECK-SEC-HOT-BAT-NEXT: .init, 0x401000, 0x40101b, 17.2888, 10

[llvm-branch-commits] [BOLT] Print heatmap section scores in perf2bolt (PR #139194)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/139194



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Build heatmap with pre-aggregated data (PR #138798)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/138798



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [BOLT] Drop perf2bolt cold samples diagnostic (PR #139337)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/139337



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [BOLT] Drop perf2bolt cold samples diagnostic (PR #139337)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/139337



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [BOLT] Print heatmap section scores in perf2bolt (PR #139194)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/139194



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Build heatmap with pre-aggregated data (PR #138798)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/138798

>From f6b275f682c598d5c026efcbd348c6e8a35c759b Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Tue, 6 May 2025 20:09:58 -0700
Subject: [PATCH 1/2] keep parsing build-id

Created using spr 1.3.4
---
 bolt/lib/Profile/DataAggregator.cpp | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp 
b/bolt/lib/Profile/DataAggregator.cpp
index c5b9696dc79d0..a5ac87ee781b2 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -450,6 +450,14 @@ int DataAggregator::prepareToParse(StringRef Name, 
PerfProcessInfo &Process,
 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   this->BC = &BC;
 
+  if (std::optional FileBuildID = BC.getFileBuildID()) {
+outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
+processFileBuildID(*FileBuildID);
+  } else {
+errs() << "BOLT-WARNING: build-id will not be checked because we could "
+  "not read one from input binary\n";
+  }
+
   auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
 errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
 exit(1);
@@ -468,14 +476,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) 
{
 goto heatmap;
   }
 
-  if (std::optional FileBuildID = BC.getFileBuildID()) {
-outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
-processFileBuildID(*FileBuildID);
-  } else {
-errs() << "BOLT-WARNING: build-id will not be checked because we could "
-  "not read one from input binary\n";
-  }
-
   if (BC.IsLinuxKernel) {
 // Current MMap parsing logic does not work with linux kernel.
 // MMap entries for linux kernel uses PERF_RECORD_MMAP

>From 47f76b8dff0f639cac4b205de86224bfa50aa430 Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Fri, 9 May 2025 15:01:28 -0700
Subject: [PATCH 2/2] simplify preprocessProfile

Created using spr 1.3.4
---
 bolt/lib/Profile/DataAggregator.cpp | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp 
b/bolt/lib/Profile/DataAggregator.cpp
index c5b9696dc79d0..a259292bd0a29 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -518,13 +518,12 @@ Error DataAggregator::preprocessProfile(BinaryContext 
&BC) {
   deleteTempFiles();
 
 heatmap:
-  if (opts::HeatmapMode) {
-if (std::error_code EC = printLBRHeatMap())
-  return errorCodeToError(EC);
-exit(0);
-  }
+  if (!opts::HeatmapMode)
+return Error::success();
 
-  return Error::success();
+  if (std::error_code EC = printLBRHeatMap())
+return errorCodeToError(EC);
+  exit(0);
 }
 
 Error DataAggregator::readProfile(BinaryContext &BC) {

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [BOLT] Build heatmap with pre-aggregated data (PR #138798)

2025-05-10 Thread Amir Ayupov via llvm-branch-commits


https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/138798

>From f6b275f682c598d5c026efcbd348c6e8a35c759b Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Tue, 6 May 2025 20:09:58 -0700
Subject: [PATCH 1/2] keep parsing build-id

Created using spr 1.3.4
---
 bolt/lib/Profile/DataAggregator.cpp | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp 
b/bolt/lib/Profile/DataAggregator.cpp
index c5b9696dc79d0..a5ac87ee781b2 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -450,6 +450,14 @@ int DataAggregator::prepareToParse(StringRef Name, 
PerfProcessInfo &Process,
 Error DataAggregator::preprocessProfile(BinaryContext &BC) {
   this->BC = &BC;
 
+  if (std::optional FileBuildID = BC.getFileBuildID()) {
+outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
+processFileBuildID(*FileBuildID);
+  } else {
+errs() << "BOLT-WARNING: build-id will not be checked because we could "
+  "not read one from input binary\n";
+  }
+
   auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) {
 errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf;
 exit(1);
@@ -468,14 +476,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) 
{
 goto heatmap;
   }
 
-  if (std::optional FileBuildID = BC.getFileBuildID()) {
-outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
-processFileBuildID(*FileBuildID);
-  } else {
-errs() << "BOLT-WARNING: build-id will not be checked because we could "
-  "not read one from input binary\n";
-  }
-
   if (BC.IsLinuxKernel) {
 // Current MMap parsing logic does not work with linux kernel.
 // MMap entries for linux kernel uses PERF_RECORD_MMAP

>From 47f76b8dff0f639cac4b205de86224bfa50aa430 Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Fri, 9 May 2025 15:01:28 -0700
Subject: [PATCH 2/2] simplify preprocessProfile

Created using spr 1.3.4
---
 bolt/lib/Profile/DataAggregator.cpp | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/bolt/lib/Profile/DataAggregator.cpp 
b/bolt/lib/Profile/DataAggregator.cpp
index c5b9696dc79d0..a259292bd0a29 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -518,13 +518,12 @@ Error DataAggregator::preprocessProfile(BinaryContext 
&BC) {
   deleteTempFiles();
 
 heatmap:
-  if (opts::HeatmapMode) {
-if (std::error_code EC = printLBRHeatMap())
-  return errorCodeToError(EC);
-exit(0);
-  }
+  if (!opts::HeatmapMode)
+return Error::success();
 
-  return Error::success();
+  if (std::error_code EC = printLBRHeatMap())
+return errorCodeToError(EC);
+  exit(0);
 }
 
 Error DataAggregator::readProfile(BinaryContext &BC) {

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120386

>From e9066eae746ce9eab372b247aea81e33accb2114 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:38:23 -0500
Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG

When lowering atomic <1 x T> vector types with floats, selection can fail since
this pattern is unsupported. To support this, floats can be casted to
an integer type of the same size.

commit-id:f9d761c5
---
 llvm/lib/Target/X86/X86ISelLowering.cpp|  4 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++
 2 files changed, 41 insertions(+)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 231b677c300a1..20d400c669693 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const 
X86TargetMachine &TM,
 setOperationAction(Op, MVT::f32, Promote);
   }
 
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);
+
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
ISD::SCALAR_TO_VECTOR,
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index d23cfb89f9fc8..6efcbb80c0ce6 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   %ret = load atomic <1 x i64>, ptr %x acquire, align 8
   ret <1 x i64> %ret
 }
+
+define <1 x half> @atomic_vec1_half(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_half:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzwl (%rdi), %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_half:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movw (%rdi), %cx
+; CHECK0-NEXT:## implicit-def: $eax
+; CHECK0-NEXT:movw %cx, %ax
+; CHECK0-NEXT:## implicit-def: $xmm0
+; CHECK0-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x half>, ptr %x acquire, align 2
+  ret <1 x half> %ret
+}
+
+define <1 x float> @atomic_vec1_float(ptr %x) {
+; CHECK-LABEL: atomic_vec1_float:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x float>, ptr %x acquire, align 4
+  ret <1 x float> %ret
+}
+
+define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec1_double_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 8
+  ret <1 x double> %ret
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120598

>From bd488e460c7709c87e7a5bf82613dac9645b4b04 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 11:19:39 -0500
Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size.

commit-id:2894ccd1
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  | 97 ++-
 llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++
 3 files changed, 153 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 89ea7ef4dbe89..bdfa5f7741ad3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
   SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8eee7a4c61fe6..f88b4d5693979 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, 
unsigned ResNo) {
 break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+Res = WidenVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:  Res = WidenVecRes_LOAD(N); break;
   case ISD::STEP_VECTOR:
   case ISD::SPLAT_VECTOR:
@@ -6014,6 +6017,74 @@ SDValue 
DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
  N->getOperand(1), N->getOperand(2));
 }
 
+/// Either return the same load or provide appropriate casts
+/// from the load and return that.
+static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
+ TypeSize LdWidth, TypeSize FirstVTWidth,
+ SDLoc dl, SelectionDAG &DAG) {
+  assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  if (!FirstVT.isVector()) {
+unsigned NumElts =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+  }
+  assert(FirstVT == WidenVT);
+  return LdOp;
+}
+
+static std::optional findMemType(SelectionDAG &DAG,
+  const TargetLowering &TLI, unsigned 
Width,
+  EVT WidenVT, unsigned Align,
+  unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  EVT WidenVT =
+  TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  EVT LdVT = LD->getMemoryVT();
+  SDLoc dl(LD);
+  assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors");
+  assert(LdVT.isScalableVector() == WidenVT.isScalableVector() &&
+ "Must be scalable");
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() &&
+ "Expected equivalent element types");
+
+  // Load information
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = LD->getAAInfo();
+
+  TypeSize LdWidth = LdVT.getSizeInBits();
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  TypeSize WidthDiff = WidenWidth - LdWidth;
+
+  // Find the vector type that can load from.
+  std::optional FirstVT =
+  findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0,
+  WidthDiff.getKnownMinValue());
+
+  if (!FirstVT)
+return SDValue();
+
+  SmallVector MemVTs;
+  TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+  SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT,
+   Chain, BasePtr, LD->getMemOperand());
+
+  // Load the element with one instruction.
+  SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth,
+ FirstVTWidth, dl, DAG);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new

[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120598

>From bd488e460c7709c87e7a5bf82613dac9645b4b04 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 11:19:39 -0500
Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load

Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size.

commit-id:2894ccd1
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  | 97 ++-
 llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++
 3 files changed, 153 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 89ea7ef4dbe89..bdfa5f7741ad3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
   SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8eee7a4c61fe6..f88b4d5693979 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, 
unsigned ResNo) {
 break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+Res = WidenVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:  Res = WidenVecRes_LOAD(N); break;
   case ISD::STEP_VECTOR:
   case ISD::SPLAT_VECTOR:
@@ -6014,6 +6017,74 @@ SDValue 
DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
  N->getOperand(1), N->getOperand(2));
 }
 
+/// Either return the same load or provide appropriate casts
+/// from the load and return that.
+static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
+ TypeSize LdWidth, TypeSize FirstVTWidth,
+ SDLoc dl, SelectionDAG &DAG) {
+  assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  if (!FirstVT.isVector()) {
+unsigned NumElts =
+WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
+EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
+SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+  }
+  assert(FirstVT == WidenVT);
+  return LdOp;
+}
+
+static std::optional findMemType(SelectionDAG &DAG,
+  const TargetLowering &TLI, unsigned 
Width,
+  EVT WidenVT, unsigned Align,
+  unsigned WidenEx);
+
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
+  EVT WidenVT =
+  TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  EVT LdVT = LD->getMemoryVT();
+  SDLoc dl(LD);
+  assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors");
+  assert(LdVT.isScalableVector() == WidenVT.isScalableVector() &&
+ "Must be scalable");
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() &&
+ "Expected equivalent element types");
+
+  // Load information
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = LD->getAAInfo();
+
+  TypeSize LdWidth = LdVT.getSizeInBits();
+  TypeSize WidenWidth = WidenVT.getSizeInBits();
+  TypeSize WidthDiff = WidenWidth - LdWidth;
+
+  // Find the vector type that can load from.
+  std::optional FirstVT =
+  findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0,
+  WidthDiff.getKnownMinValue());
+
+  if (!FirstVT)
+return SDValue();
+
+  SmallVector MemVTs;
+  TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+  SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT,
+   Chain, BasePtr, LD->getMemOperand());
+
+  // Load the element with one instruction.
+  SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth,
+ FirstVTWidth, dl, DAG);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new

[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120386

>From e9066eae746ce9eab372b247aea81e33accb2114 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:38:23 -0500
Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG

When lowering atomic <1 x T> vector types with floats, selection can fail since
this pattern is unsupported. To support this, floats can be casted to
an integer type of the same size.

commit-id:f9d761c5
---
 llvm/lib/Target/X86/X86ISelLowering.cpp|  4 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++
 2 files changed, 41 insertions(+)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 231b677c300a1..20d400c669693 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const 
X86TargetMachine &TM,
 setOperationAction(Op, MVT::f32, Promote);
   }
 
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);
+  setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);
+
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
ISD::SCALAR_TO_VECTOR,
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index d23cfb89f9fc8..6efcbb80c0ce6 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   %ret = load atomic <1 x i64>, ptr %x acquire, align 8
   ret <1 x i64> %ret
 }
+
+define <1 x half> @atomic_vec1_half(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_half:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzwl (%rdi), %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_half:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movw (%rdi), %cx
+; CHECK0-NEXT:## implicit-def: $eax
+; CHECK0-NEXT:movw %cx, %ax
+; CHECK0-NEXT:## implicit-def: $xmm0
+; CHECK0-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x half>, ptr %x acquire, align 2
+  ret <1 x half> %ret
+}
+
+define <1 x float> @atomic_vec1_float(ptr %x) {
+; CHECK-LABEL: atomic_vec1_float:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x float>, ptr %x acquire, align 4
+  ret <1 x float> %ret
+}
+
+define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec1_double_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 8
+  ret <1 x double> %ret
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120385

>From 6b14da3539e6040f2da1c49cf35c84ef0a2b840d Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:37:17 -0500
Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load

`load atomic <1 x T>` is not valid. This change legalizes
vector types of atomic load via scalarization in SelectionDAG
so that it can, for example, translate from `v1i32` to `i32`.

commit-id:5c36cc8c
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  15 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +-
 3 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 720393158aa5e..89ea7ef4dbe89 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N);
   SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+  SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_VSELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index d0b69b88748a9..8eee7a4c61fe6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, 
unsigned ResNo) {
 R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
 break;
   case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+R = ScalarizeVecRes_ATOMIC_LOAD(cast(N));
+break;
   case ISD::LOAD:   R = 
ScalarizeVecRes_LOAD(cast(N));break;
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -458,6 +461,18 @@ SDValue 
DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
   return Op;
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+  SDValue Result = DAG.getAtomicLoad(
+  ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
+  N->getValueType(0).getVectorElementType(), N->getChain(), 
N->getBasePtr(),
+  N->getMemOperand());
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+  return Result;
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
   assert(N->isUnindexed() && "Indexed vector load?");
 
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 5bce4401f7bdb..d23cfb89f9fc8 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | 
FileCheck %s --check-prefixes=CHECK,CHECK3
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | 
FileCheck %s --check-prefixes=CHECK,CHECK0
 
 define void @test1(ptr %ptr, i32 %val1) {
 ; CHECK-LABEL: test1:
@@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) {
   %val = load atomic i32, ptr %ptr seq_cst, align 4
   ret i32 %val
 }
+
+define <1 x i32> @atomic_vec1_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i32:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:movl (%rdi), %eax
+; CHECK-NEXT:retq
+  %ret = load atomic <1 x i32>, ptr %x acquire, align 4
+  ret <1 x i32> %ret
+}
+
+define <1 x i8> @atomic_vec1_i8(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i8:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzbl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i8:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movb (%rdi), %al
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i8>, ptr %x acquire, align 1
+  ret <1 x i8> %ret
+}
+
+define <1 x i16> @atomic_vec1_i16(ptr %x) {
+; CHECK3-LABEL: atomic_vec1_i16:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movzwl (%rdi), %eax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i16:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movw (%rdi), %ax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i16>, ptr %x acquire, align 2
+  ret <1 x i16> %ret
+}
+
+define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
+; CHECK3-LABEL: atomic_ve

[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120716

>From ce64f048fb5324e5b2ddd0e7198e2fb400a62d8e Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 20 Dec 2024 06:14:28 -0500
Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector

AtomicExpand fails for aligned `load atomic ` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.

commit-id:f430c1af
---
 llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 -
 llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 30 +
 .../X86/expand-atomic-non-integer.ll  | 65 +++
 4 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp 
b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index c376de877ac7d..70f59eafc6ecb 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
 I->replaceAllUsesWith(V);
   } else if (HasResult) {
 Value *V;
-if (UseSizedLibcall)
-  V = Builder.CreateBitOrPointerCast(Result, I->getType());
-else {
+if (UseSizedLibcall) {
+  // Add bitcasts from Result's scalar type to I's  vector type
+  auto *PtrTy = dyn_cast(I->getType()->getScalarType());
+  auto *VTy = dyn_cast(I->getType());
+  if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
+unsigned AS = PtrTy->getAddressSpace();
+Value *BC = Builder.CreateBitCast(
+Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
+V = Builder.CreateIntToPtr(BC, I->getType());
+  } else
+V = Builder.CreateBitOrPointerCast(Result, I->getType());
+} else {
   V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
 AllocaAlignment);
   Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll 
b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29..eaa2ffd9b2731 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double 
%val1) {
   store atomic double %val1, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM:   @ %bb.0:
+; ARM-NEXT:ldr r0, [r0]
+; ARM-NEXT:dmb ish
+; ARM-NEXT:bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE:   @ %bb.0:
+; ARMOPTNONE-NEXT:ldr r0, [r0]
+; ARMOPTNONE-NEXT:dmb ish
+; ARMOPTNONE-NEXT:bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO:   @ %bb.0:
+; THUMBTWO-NEXT:ldr r0, [r0]
+; THUMBTWO-NEXT:dmb ish
+; THUMBTWO-NEXT:bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE:   @ %bb.0:
+; THUMBONE-NEXT:push {r7, lr}
+; THUMBONE-NEXT:movs r1, #0
+; THUMBONE-NEXT:mov r2, r1
+; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT:pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4:   @ %bb.0:
+; ARMV4-NEXT:push {r11, lr}
+; ARMV4-NEXT:mov r1, #2
+; ARMV4-NEXT:bl __atomic_load_4
+; ARMV4-NEXT:pop {r11, lr}
+; ARMV4-NEXT:mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6:   @ %bb.0:
+; ARMV6-NEXT:ldr r0, [r0]
+; ARMV6-NEXT:mov r1, #0
+; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT:bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM:   @ %bb.0:
+; THUMBM-NEXT:ldr r0, [r0]
+; THUMBM-NEXT:dmb sy
+; THUMBM-NEXT:bx lr
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index f72970d12b6eb..d3027e799 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -382,6 +382,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
   ret <2 x i32> %ret
 }
 
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_ptr_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:popq %rax
+; CHECK-NEXT:retq
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+
 define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
 ; CHECK3-LABEL: atomic_vec4_i8:
 ; CHECK3:   ## %bb.0:
@@ -405,6 +420,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
   ret <4 x i16> %ret
 }
 
+define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_ptr270:
+; CHECK:   ## %b

[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120387

>From d02434d4ca3ec809d7a72862408bb569f4043939 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Wed, 18 Dec 2024 03:40:32 -0500
Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes.

Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.

commit-id:a06a5cc6
---
 llvm/test/CodeGen/X86/atomic-load-store.ll | 253 +
 1 file changed, 253 insertions(+)

diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 6efcbb80c0ce6..39e9fdfa5e62b 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   ret <1 x i64> %ret
 }
 
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_ptr:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movq (%rsp), %rax
+; CHECK3-NEXT:popq %rcx
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_ptr:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq (%rsp), %rax
+; CHECK0-NEXT:popq %rcx
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
+
 define <1 x half> @atomic_vec1_half(ptr %x) {
 ; CHECK3-LABEL: atomic_vec1_half:
 ; CHECK3:   ## %bb.0:
@@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) 
nounwind {
   %ret = load atomic <1 x double>, ptr %x acquire, align 8
   ret <1 x double> %ret
 }
+
+define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_i64:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movq (%rsp), %rax
+; CHECK3-NEXT:popq %rcx
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_i64:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq (%rsp), %rax
+; CHECK0-NEXT:popq %rcx
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x i64>, ptr %x acquire, align 4
+  ret <1 x i64> %ret
+}
+
+define <1 x double> @atomic_vec1_double(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec1_double:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK3-NEXT:popq %rax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec1_double:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:popq %rax
+; CHECK0-NEXT:retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 4
+  ret <1 x double> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_i32:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:pushq %rax
+; CHECK3-NEXT:movq %rdi, %rsi
+; CHECK3-NEXT:movq %rsp, %rdx
+; CHECK3-NEXT:movl $8, %edi
+; CHECK3-NEXT:movl $2, %ecx
+; CHECK3-NEXT:callq ___atomic_load
+; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK3-NEXT:popq %rax
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_i32:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:pushq %rax
+; CHECK0-NEXT:movq %rdi, %rsi
+; CHECK0-NEXT:movl $8, %edi
+; CHECK0-NEXT:movq %rsp, %rdx
+; CHECK0-NEXT:movl $2, %ecx
+; CHECK0-NEXT:callq ___atomic_load
+; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:popq %rax
+; CHECK0-NEXT:retq
+  %ret = load atomic <2 x i32>, ptr %x acquire, align 4
+  ret <2 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_float_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[

[llvm-branch-commits] [llvm] [SelectionDAG] Split vector types for atomic load (PR #120640)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120640

>From 40b0a4ee9e008eeb749c49851bc52a66809a70a0 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Thu, 19 Dec 2024 16:25:55 -0500
Subject: [PATCH] [SelectionDAG] Split vector types for atomic load

Vector types that aren't widened are split
so that a single ATOMIC_LOAD is issued for the entire vector at once.
This change utilizes the load vectorization infrastructure in
SelectionDAG in order to group the vectors. This enables SelectionDAG
to translate vectors with type bfloat,half.

commit-id:3a045357
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp  |  37 
 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++
 3 files changed, 209 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index bdfa5f7741ad3..d8f402f529632 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f88b4d5693979..a3b30943c8e7d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SplitVecRes_STEP_VECTOR(N, Lo, Hi);
 break;
   case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::ATOMIC_LOAD:
+SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi);
+break;
   case ISD::LOAD:
 SplitVecRes_LOAD(cast(N), Lo, Hi);
 break;
@@ -1421,6 +1424,40 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, 
unsigned ResNo) {
 SetSplitVector(SDValue(N, ResNo), Lo, Hi);
 }
 
+void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo,
+   SDValue &Hi) {
+  assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+ "Extended load during type legalization!");
+  SDLoc dl(LD);
+  EVT VT = LD->getValueType(0);
+  EVT LoVT, HiVT;
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+  SDValue Ch = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+
+  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+  EVT MemIntVT =
+  EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits());
+  SDValue ALD = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, MemIntVT, IntVT, Ch,
+  Ptr, LD->getMemOperand());
+
+  EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+  EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+  SDValue ExtractLo = DAG.getNode(ISD::TRUNCATE, dl, LoIntVT, ALD);
+  SDValue ExtractHi =
+  DAG.getNode(ISD::SRL, dl, IntVT, ALD,
+  DAG.getIntPtrConstant(VT.getSizeInBits() / 2, dl));
+  ExtractHi = DAG.getNode(ISD::TRUNCATE, dl, HiIntVT, ExtractHi);
+
+  Lo = DAG.getBitcast(LoVT, ExtractLo);
+  Hi = DAG.getBitcast(HiVT, ExtractHi);
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1));
+}
+
 void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
 MachinePointerInfo &MPI, SDValue &Ptr,
 uint64_t *ScaledOffset) {
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 3cf9e3c1a8dfa..6e2e9d4b21891 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -205,6 +205,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
   ret <2 x float> %ret
 }
 
+define <2 x half> @atomic_vec2_half(ptr %x) {
+; CHECK3-LABEL: atomic_vec2_half:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movl (%rdi), %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT:shrl $16, %eax
+; CHECK3-NEXT:pinsrw $0, %eax, %xmm1
+; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = 
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_half:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movl (%rdi), %eax
+; CHECK0-NEXT:movl %eax, %ecx
+; CHECK0-NEXT:shrl

[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/120716

>From ce64f048fb5324e5b2ddd0e7198e2fb400a62d8e Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 20 Dec 2024 06:14:28 -0500
Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector

AtomicExpand fails for aligned `load atomic ` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.

commit-id:f430c1af
---
 llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 -
 llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++
 llvm/test/CodeGen/X86/atomic-load-store.ll| 30 +
 .../X86/expand-atomic-non-integer.ll  | 65 +++
 4 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp 
b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index c376de877ac7d..70f59eafc6ecb 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
 I->replaceAllUsesWith(V);
   } else if (HasResult) {
 Value *V;
-if (UseSizedLibcall)
-  V = Builder.CreateBitOrPointerCast(Result, I->getType());
-else {
+if (UseSizedLibcall) {
+  // Add bitcasts from Result's scalar type to I's  vector type
+  auto *PtrTy = dyn_cast(I->getType()->getScalarType());
+  auto *VTy = dyn_cast(I->getType());
+  if (VTy && PtrTy && !Result->getType()->isVectorTy()) {
+unsigned AS = PtrTy->getAddressSpace();
+Value *BC = Builder.CreateBitCast(
+Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS)));
+V = Builder.CreateIntToPtr(BC, I->getType());
+  } else
+V = Builder.CreateBitOrPointerCast(Result, I->getType());
+} else {
   V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
 AllocaAlignment);
   Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll 
b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29..eaa2ffd9b2731 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double 
%val1) {
   store atomic double %val1, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM:   @ %bb.0:
+; ARM-NEXT:ldr r0, [r0]
+; ARM-NEXT:dmb ish
+; ARM-NEXT:bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE:   @ %bb.0:
+; ARMOPTNONE-NEXT:ldr r0, [r0]
+; ARMOPTNONE-NEXT:dmb ish
+; ARMOPTNONE-NEXT:bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO:   @ %bb.0:
+; THUMBTWO-NEXT:ldr r0, [r0]
+; THUMBTWO-NEXT:dmb ish
+; THUMBTWO-NEXT:bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE:   @ %bb.0:
+; THUMBONE-NEXT:push {r7, lr}
+; THUMBONE-NEXT:movs r1, #0
+; THUMBONE-NEXT:mov r2, r1
+; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT:pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4:   @ %bb.0:
+; ARMV4-NEXT:push {r11, lr}
+; ARMV4-NEXT:mov r1, #2
+; ARMV4-NEXT:bl __atomic_load_4
+; ARMV4-NEXT:pop {r11, lr}
+; ARMV4-NEXT:mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6:   @ %bb.0:
+; ARMV6-NEXT:ldr r0, [r0]
+; ARMV6-NEXT:mov r1, #0
+; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT:bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM:   @ %bb.0:
+; THUMBM-NEXT:ldr r0, [r0]
+; THUMBM-NEXT:dmb sy
+; THUMBM-NEXT:bx lr
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index f72970d12b6eb..d3027e799 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -382,6 +382,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
   ret <2 x i32> %ret
 }
 
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_ptr_align:
+; CHECK:   ## %bb.0:
+; CHECK-NEXT:pushq %rax
+; CHECK-NEXT:movl $2, %esi
+; CHECK-NEXT:callq ___atomic_load_16
+; CHECK-NEXT:movq %rdx, %xmm1
+; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:popq %rax
+; CHECK-NEXT:retq
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+
 define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
 ; CHECK3-LABEL: atomic_vec4_i8:
 ; CHECK3:   ## %bb.0:
@@ -405,6 +420,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
   ret <4 x i16> %ret
 }
 
+define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_ptr270:
+; CHECK:   ## %b

[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/125432

>From bf8fc80f870022c2a42d01a500e2b16d648dd376 Mon Sep 17 00:00:00 2001
From: jofrn 
Date: Fri, 31 Jan 2025 13:12:56 -0500
Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic
 vector.

After splitting, all elements are created. The two components must
be found by looking at the upper and lower half of EXTRACT_ELEMENT.
This change extends EltsFromConsecutiveLoads
to understand AtomicSDNode so that unused elements can be removed.

commit-id:b83937a8
---
 llvm/include/llvm/CodeGen/SelectionDAG.h  |   4 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  20 ++-
 .../SelectionDAGAddressAnalysis.cpp   |  30 ++--
 llvm/lib/Target/X86/X86ISelLowering.cpp   |  59 +--
 llvm/test/CodeGen/X86/atomic-load-store.ll| 149 ++
 5 files changed, 90 insertions(+), 172 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h 
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 87b6914f8a0ee..ab8bb517e6ae4 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1873,7 +1873,7 @@ class SelectionDAG {
   /// chain to the token factor. This ensures that the new memory node will 
have
   /// the same relative memory dependency position as the old load. Returns the
   /// new merged load chain.
-  SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
+  SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
 
   /// Topological-sort the AllNodes list and a
   /// assign a unique node id for each node in the DAG based on their
@@ -2311,7 +2311,7 @@ class SelectionDAG {
   /// merged. Check that both are nonvolatile and if LD is loading
   /// 'Bytes' bytes from a location that is 'Dist' units away from the
   /// location that the 'Base' load is loading from.
-  bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
+  bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
   unsigned Bytes, int Dist) const;
 
   /// Infer alignment of a load / store address. Return std::nullopt if it
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index bbf1b0fd590ef..38b22078c8c44 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12215,7 +12215,7 @@ SDValue 
SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
   return TokenFactor;
 }
 
-SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
+SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
SDValue NewMemOp) {
   assert(isa(NewMemOp.getNode()) && "Expected a memop node");
   SDValue OldChain = SDValue(OldLoad, 1);
@@ -12905,17 +12905,21 @@ std::pair 
SelectionDAG::UnrollVectorOverflowOp(
 getBuildVector(NewOvVT, dl, OvScalars));
 }
 
-bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
-  LoadSDNode *Base,
+bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
+  MemSDNode *Base,
   unsigned Bytes,
   int Dist) const {
   if (LD->isVolatile() || Base->isVolatile())
 return false;
-  // TODO: probably too restrictive for atomics, revisit
-  if (!LD->isSimple())
-return false;
-  if (LD->isIndexed() || Base->isIndexed())
-return false;
+  if (auto Ld = dyn_cast(LD)) {
+if (!Ld->isSimple())
+  return false;
+if (Ld->isIndexed())
+  return false;
+  }
+  if (auto Ld = dyn_cast(Base))
+if (Ld->isIndexed())
+  return false;
   if (LD->getChain() != Base->getChain())
 return false;
   EVT VT = LD->getMemoryVT();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780..c29cb424c7a4c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, 
int64_t BitSize,
 }
 
 /// Parses tree in Ptr for base, index, offset addresses.
-static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
-   const SelectionDAG &DAG) {
+template 
+static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
   SDValue Ptr = N->getBasePtr();
 
   // (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
   bool IsIndexSignExt = false;
 
   // pre-inc/pre-dec ops are components of EA.
-  if (N->getAddressingMode() == ISD::PRE_INC) {
-if (auto *C = dyn_cast(N->getOffset()))

[llvm-branch-commits] [llvm] [X86] Remove extra MOV after widening atomic load (PR #138635)

2025-05-10 Thread via llvm-branch-commits


https://github.com/jofrn updated 
https://github.com/llvm/llvm-project/pull/138635

>From 539584cc6a26cb072ed8fa199a873256543d301a Mon Sep 17 00:00:00 2001
From: jofernau_amdeng 
Date: Tue, 6 May 2025 01:48:11 -0400
Subject: [PATCH] [X86] Remove extra MOV after widening atomic load

This change adds patterns to optimize out an extra MOV
present after widening the atomic load.

commit-id:45989503
---
 llvm/lib/Target/X86/X86InstrCompiler.td|  7 
 llvm/test/CodeGen/X86/atomic-load-store.ll | 40 --
 llvm/test/CodeGen/X86/atomic-unordered.ll  |  3 +-
 3 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td 
b/llvm/lib/Target/X86/X86InstrCompiler.td
index efa1e8bd7f3e3..786d0567280f9 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1204,6 +1204,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), 
(MOV16rm addr:$src)>;
 def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>;
 def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>;
 
+def : Pat<(v4i32 (scalar_to_vector (i32 (zext (i16 (atomic_load_16 
addr:$src)),
+   (MOVDI2PDIrm addr:$src)>;   // load atomic <2 x i8>
+def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src,
+   (MOVDI2PDIrm addr:$src)>;   // load atomic <2 x i16>
+def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src,
+   (MOV64toPQIrm  addr:$src)>; // load atomic <2 x i32,float>
+
 // Floating point loads/stores.
 def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
   (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll 
b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 9ee8b4fc5ac7f..3cf9e3c1a8dfa 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -165,11 +165,15 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) {
 }
 
 define <2 x i16> @atomic_vec2_i16(ptr %x) {
-; CHECK-LABEL: atomic_vec2_i16:
-; CHECK:   ## %bb.0:
-; CHECK-NEXT:movl (%rdi), %eax
-; CHECK-NEXT:movd %eax, %xmm0
-; CHECK-NEXT:retq
+; CHECK3-LABEL: atomic_vec2_i16:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec2_i16:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK0-NEXT:retq
   %ret = load atomic <2 x i16>, ptr %x acquire, align 4
   ret <2 x i16> %ret
 }
@@ -177,8 +181,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) {
 define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) {
 ; CHECK-LABEL: atomic_vec2_ptr270:
 ; CHECK:   ## %bb.0:
-; CHECK-NEXT:movq (%rdi), %rax
-; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:movq (%rdi), %xmm0
 ; CHECK-NEXT:retq
   %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8
   ret <2 x ptr addrspace(270)> %ret
@@ -187,8 +190,7 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) 
{
 define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
 ; CHECK-LABEL: atomic_vec2_i32_align:
 ; CHECK:   ## %bb.0:
-; CHECK-NEXT:movq (%rdi), %rax
-; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:movq (%rdi), %xmm0
 ; CHECK-NEXT:retq
   %ret = load atomic <2 x i32>, ptr %x acquire, align 8
   ret <2 x i32> %ret
@@ -197,8 +199,7 @@ define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
 define <2 x float> @atomic_vec2_float_align(ptr %x) {
 ; CHECK-LABEL: atomic_vec2_float_align:
 ; CHECK:   ## %bb.0:
-; CHECK-NEXT:movq (%rdi), %rax
-; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:movq (%rdi), %xmm0
 ; CHECK-NEXT:retq
   %ret = load atomic <2 x float>, ptr %x acquire, align 8
   ret <2 x float> %ret
@@ -354,11 +355,15 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
 }
 
 define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
-; CHECK-LABEL: atomic_vec4_i8:
-; CHECK:   ## %bb.0:
-; CHECK-NEXT:movl (%rdi), %eax
-; CHECK-NEXT:movd %eax, %xmm0
-; CHECK-NEXT:retq
+; CHECK3-LABEL: atomic_vec4_i8:
+; CHECK3:   ## %bb.0:
+; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK3-NEXT:retq
+;
+; CHECK0-LABEL: atomic_vec4_i8:
+; CHECK0:   ## %bb.0:
+; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK0-NEXT:retq
   %ret = load atomic <4 x i8>, ptr %x acquire, align 4
   ret <4 x i8> %ret
 }
@@ -366,8 +371,7 @@ define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
 define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
 ; CHECK-LABEL: atomic_vec4_i16:
 ; CHECK:   ## %bb.0:
-; CHECK-NEXT:movq (%rdi), %rax
-; CHECK-NEXT:movq %rax, %xmm0
+; CHECK-NEXT:movq (%rdi), %xmm0
 ; CHECK-NEXT:retq
   %ret = load atomic <4 x i16>, ptr %x acquire, align 8
   ret <4 x i16> %ret
diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll 
b/llvm/test/CodeGen/X86/atomic-unorder

1 2 >

1 - 100 of 117 matches

Mail list logo