[llvm-branch-commits] [Github][CI] Make the new premerge system fail jobs on errors (PR #139359)
https://github.com/joker-eph approved this pull request. https://github.com/llvm/llvm-project/pull/139359 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Github][CI] Make the new premerge system fail jobs on errors (PR #139359)
https://github.com/tstellar approved this pull request. LGTM. https://github.com/llvm/llvm-project/pull/139359 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: Fix crash lowering stack guard on OpenBSD/aarch64. (#125416) (PR #136458)
3405691582 wrote: > @brad0 (or anyone else). If you would like to add a note about this fix in > the release notes (completely optional). Please reply to this comment with a > one or two sentence description of the fix. When you are done, please add the > release:note label to this PR. Fixed compiler crash on OpenBSD/aarch64 when using stack protection by ensuring platform-specific guard variables are consistently referenced. https://github.com/llvm/llvm-project/pull/136458 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [RISCV] Fix assertion failure when using -fstack-clash-protection (#135248) (PR #139388)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/139388 Backport b3d2dc321c5c78b7204696afe07fe6ef3375acfd Requested by: @tstellar >From ff4132ec328ed80be247856939dbf7345106cc55 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Fri, 18 Apr 2025 09:12:52 -0700 Subject: [PATCH] [RISCV] Fix assertion failure when using -fstack-clash-protection (#135248) We can't assume MBBI is still pointing at MBB if we've already expanded a probe. We need to re-query the MBB from MBBI. Fixes #135206 Co-authored-by: Craig Topper (cherry picked from commit b3d2dc321c5c78b7204696afe07fe6ef3375acfd) --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 8 +- llvm/test/CodeGen/RISCV/pr135206.ll | 84 2 files changed, 89 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/pr135206.ll diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index bb2e5781c34db..6f4c1e16190f4 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -2135,11 +2135,13 @@ TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const { } // Synthesize the probe loop. -static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc DL, +static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL, Register TargetReg, bool IsRVV) { assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP"); + MachineBasicBlock &MBB = *MBBI->getParent(); + MachineFunction &MF = *MBB.getParent(); + auto &Subtarget = MF.getSubtarget(); const RISCVInstrInfo *TII = Subtarget.getInstrInfo(); bool IsRV64 = Subtarget.is64Bit(); @@ -2228,7 +2230,7 @@ void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock::iterator MBBI = MI->getIterator(); DebugLoc DL = MBB.findDebugLoc(MBBI); Register TargetReg = MI->getOperand(1).getReg(); - emitStackProbeInline(MF, MBB, MBBI, DL, TargetReg, + emitStackProbeInline(MBBI, DL, TargetReg, (MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV)); MBBI->eraseFromParent(); } diff --git a/llvm/test/CodeGen/RISCV/pr135206.ll b/llvm/test/CodeGen/RISCV/pr135206.ll new file mode 100644 index 0..196e78d8ed8b9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr135206.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple riscv64 < %s -o - | FileCheck %s + +%"buff" = type { [4096 x i64] } + +declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) +declare void @bar() + +define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT:addi sp, sp, -2032 +; CHECK-NEXT:sd ra, 2024(sp) # 8-byte Folded Spill +; CHECK-NEXT:sd s0, 2016(sp) # 8-byte Folded Spill +; CHECK-NEXT:sd s1, 2008(sp) # 8-byte Folded Spill +; CHECK-NEXT:sd s2, 2000(sp) # 8-byte Folded Spill +; CHECK-NEXT:sd s3, 1992(sp) # 8-byte Folded Spill +; CHECK-NEXT:lui a0, 7 +; CHECK-NEXT:sub t1, sp, a0 +; CHECK-NEXT:lui t2, 1 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT:sub sp, sp, t2 +; CHECK-NEXT:sd zero, 0(sp) +; CHECK-NEXT:bne sp, t1, .LBB0_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT:addi sp, sp, -2048 +; CHECK-NEXT:addi sp, sp, -96 +; CHECK-NEXT:csrr t1, vlenb +; CHECK-NEXT:lui t2, 1 +; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT:sub sp, sp, t2 +; CHECK-NEXT:sd zero, 0(sp) +; CHECK-NEXT:sub t1, t1, t2 +; CHECK-NEXT:bge t1, t2, .LBB0_3 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT:sub sp, sp, t1 +; CHECK-NEXT:li a0, 86 +; CHECK-NEXT:addi s0, sp, 48 +; CHECK-NEXT:addi s1, sp, 32 +; CHECK-NEXT:addi s2, sp, 16 +; CHECK-NEXT:lui a1, 353637 +; CHECK-NEXT:vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT:vmv.v.x v8, a0 +; CHECK-NEXT:lui a0, 8 +; CHECK-NEXT:addiw a0, a0, 32 +; CHECK-NEXT:add a0, sp, a0 +; CHECK-NEXT:vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-NEXT:addiw a0, a1, 1622 +; CHECK-NEXT:vse8.v v8, (s0) +; CHECK-NEXT:vse8.v v8, (s1) +; CHECK-NEXT:vse8.v v8, (s2) +; CHECK-NEXT:slli a1, a0, 32 +; CHECK-NEXT:add s3, a0, a1 +; CHECK-NEXT:sd s3, 64(sp) +; CHECK-NEXT:call bar +; CHECK-NEXT:lui a0, 8 +; CHECK-NEXT:addiw a0, a0, 32 +; CHECK-NEXT:add a0, sp, a0 +; CHECK-NEXT:vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-NEXT:vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT:vse8.v v8, (s0) +; CHECK-NEXT:vse8.v v8, (s1) +; CHECK-NEXT:vse8.v v8, (s2) +; CHECK-NEXT:sd s3, 64(sp) +; CHECK-NEXT:li a0, 0 +; CHECK-NEX
[llvm-branch-commits] [llvm] release/20.x: [RISCV] Fix assertion failure when using -fstack-clash-protection (#135248) (PR #139388)
llvmbot wrote: @llvm/pr-subscribers-backend-risc-v Author: None (llvmbot) Changes Backport b3d2dc321c5c78b7204696afe07fe6ef3375acfd Requested by: @tstellar --- Full diff: https://github.com/llvm/llvm-project/pull/139388.diff 2 Files Affected: - (modified) llvm/lib/Target/RISCV/RISCVFrameLowering.cpp (+5-3) - (added) llvm/test/CodeGen/RISCV/pr135206.ll (+84) ``diff diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index bb2e5781c34db..6f4c1e16190f4 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -2135,11 +2135,13 @@ TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const { } // Synthesize the probe loop. -static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc DL, +static void emitStackProbeInline(MachineBasicBlock::iterator MBBI, DebugLoc DL, Register TargetReg, bool IsRVV) { assert(TargetReg != RISCV::X2 && "New top of stack cannot already be in SP"); + MachineBasicBlock &MBB = *MBBI->getParent(); + MachineFunction &MF = *MBB.getParent(); + auto &Subtarget = MF.getSubtarget(); const RISCVInstrInfo *TII = Subtarget.getInstrInfo(); bool IsRV64 = Subtarget.is64Bit(); @@ -2228,7 +2230,7 @@ void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock::iterator MBBI = MI->getIterator(); DebugLoc DL = MBB.findDebugLoc(MBBI); Register TargetReg = MI->getOperand(1).getReg(); - emitStackProbeInline(MF, MBB, MBBI, DL, TargetReg, + emitStackProbeInline(MBBI, DL, TargetReg, (MI->getOpcode() == RISCV::PROBED_STACKALLOC_RVV)); MBBI->eraseFromParent(); } diff --git a/llvm/test/CodeGen/RISCV/pr135206.ll b/llvm/test/CodeGen/RISCV/pr135206.ll new file mode 100644 index 0..196e78d8ed8b9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr135206.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple riscv64 < %s -o - | FileCheck %s + +%"buff" = type { [4096 x i64] } + +declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) +declare void @bar() + +define i1 @foo() nounwind "probe-stack"="inline-asm" "target-features"="+v" { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT:addi sp, sp, -2032 +; CHECK-NEXT:sd ra, 2024(sp) # 8-byte Folded Spill +; CHECK-NEXT:sd s0, 2016(sp) # 8-byte Folded Spill +; CHECK-NEXT:sd s1, 2008(sp) # 8-byte Folded Spill +; CHECK-NEXT:sd s2, 2000(sp) # 8-byte Folded Spill +; CHECK-NEXT:sd s3, 1992(sp) # 8-byte Folded Spill +; CHECK-NEXT:lui a0, 7 +; CHECK-NEXT:sub t1, sp, a0 +; CHECK-NEXT:lui t2, 1 +; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT:sub sp, sp, t2 +; CHECK-NEXT:sd zero, 0(sp) +; CHECK-NEXT:bne sp, t1, .LBB0_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT:addi sp, sp, -2048 +; CHECK-NEXT:addi sp, sp, -96 +; CHECK-NEXT:csrr t1, vlenb +; CHECK-NEXT:lui t2, 1 +; CHECK-NEXT: .LBB0_3: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT:sub sp, sp, t2 +; CHECK-NEXT:sd zero, 0(sp) +; CHECK-NEXT:sub t1, t1, t2 +; CHECK-NEXT:bge t1, t2, .LBB0_3 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT:sub sp, sp, t1 +; CHECK-NEXT:li a0, 86 +; CHECK-NEXT:addi s0, sp, 48 +; CHECK-NEXT:addi s1, sp, 32 +; CHECK-NEXT:addi s2, sp, 16 +; CHECK-NEXT:lui a1, 353637 +; CHECK-NEXT:vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT:vmv.v.x v8, a0 +; CHECK-NEXT:lui a0, 8 +; CHECK-NEXT:addiw a0, a0, 32 +; CHECK-NEXT:add a0, sp, a0 +; CHECK-NEXT:vs1r.v v8, (a0) # vscale x 8-byte Folded Spill +; CHECK-NEXT:addiw a0, a1, 1622 +; CHECK-NEXT:vse8.v v8, (s0) +; CHECK-NEXT:vse8.v v8, (s1) +; CHECK-NEXT:vse8.v v8, (s2) +; CHECK-NEXT:slli a1, a0, 32 +; CHECK-NEXT:add s3, a0, a1 +; CHECK-NEXT:sd s3, 64(sp) +; CHECK-NEXT:call bar +; CHECK-NEXT:lui a0, 8 +; CHECK-NEXT:addiw a0, a0, 32 +; CHECK-NEXT:add a0, sp, a0 +; CHECK-NEXT:vl1r.v v8, (a0) # vscale x 8-byte Folded Reload +; CHECK-NEXT:vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT:vse8.v v8, (s0) +; CHECK-NEXT:vse8.v v8, (s1) +; CHECK-NEXT:vse8.v v8, (s2) +; CHECK-NEXT:sd s3, 64(sp) +; CHECK-NEXT:li a0, 0 +; CHECK-NEXT:csrr a1, vlenb +; CHECK-NEXT:add sp, sp, a1 +; CHECK-NEXT:lui a1, 8 +; CHECK-NEXT:addiw a1, a1, -1952 +; CHECK-NEXT:add sp, sp, a1 +; CHECK-NEXT:ld ra, 2024(sp) # 8-byte Folded Reload +; CHECK-NEXT:ld s0, 2016(sp) # 8-byte Folded Reload +; CHECK-NEXT:ld s1, 2008(sp) # 8-byte Folded Reload +; CHECK-NEXT:ld s2, 2000(sp) # 8-byte Folded Reload +; CHECK-NEXT:ld s3, 1992(sp) # 8-byte Folded Reload +; CHECK-NEXT:addi sp,
[llvm-branch-commits] [llvm] release/20.x: [RISCV] Fix assertion failure when using -fstack-clash-protection (#135248) (PR #139388)
llvmbot wrote: @preames What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/139388 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [RISCV] Fix assertion failure when using -fstack-clash-protection (#135248) (PR #139388)
eyestoreoye wrote: Very clean code with good test coverage. This is a great example of best practices. https://github.com/llvm/llvm-project/pull/139388 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)
https://github.com/anutosh491 updated https://github.com/llvm/llvm-project/pull/137620 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: AMDGPU: Stop emitting an error on illegal addrspacecasts (#127487) (PR #127496)
nikic wrote: Closing this as https://github.com/llvm/llvm-project/pull/127751 has landed. https://github.com/llvm/llvm-project/pull/127496 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: AMDGPU: Stop emitting an error on illegal addrspacecasts (#127487) (PR #127496)
https://github.com/nikic closed https://github.com/llvm/llvm-project/pull/127496 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-format] Fix a crash on formatting missing r_pare… (PR #139345)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/139345 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Fix build on ppc64+musl (#120036) (PR #139389)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/139389 Backport 801b519dfd01e21da0be17aa8f8dc2ceb0eb9e77 Requested by: @tstellar >From 3632a99ac092c604b374d1f02681465e48d5 Mon Sep 17 00:00:00 2001 From: mojyack <66899529+mojy...@users.noreply.github.com> Date: Fri, 11 Apr 2025 06:23:26 +0900 Subject: [PATCH] [sanitizer_common] Fix build on ppc64+musl (#120036) In powerpc64-unknown-linux-musl, signal.h does not include asm/ptrace.h, which causes "member access into incomplete type 'struct pt_regs'" errors. Include the header explicitly to fix this. Also in sanitizer_linux_libcdep.cpp, there is a usage of TlsPreTcbSize which is not defined in such a platform. Guard the branch with macro. (cherry picked from commit 801b519dfd01e21da0be17aa8f8dc2ceb0eb9e77) --- .../lib/sanitizer_common/sanitizer_linux.cpp| 4 .../sanitizer_common/sanitizer_linux_libcdep.cpp| 13 +++-- .../sanitizer_platform_limits_posix.cpp | 2 +- .../sanitizer_stoptheworld_linux_libcdep.cpp| 3 ++- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 7aa48d29d2d53..a4d526b4466c3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -86,6 +86,10 @@ #include # endif +# if SANITIZER_LINUX && defined(__powerpc64__) +#include +# endif + # if SANITIZER_FREEBSD #include #include diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index e11eff13cd326..331e1c7d8d152 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -619,21 +619,22 @@ static void GetTls(uptr *addr, uptr *size) { *addr = tp - RoundUpTo(*size, align); *size = tp - *addr + ThreadDescriptorSize(); # else - if (SANITIZER_GLIBC) -*size += 1664; - else if (SANITIZER_FREEBSD) -*size += 128; // RTLD_STATIC_TLS_EXTRA -#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 +#if SANITIZER_GLIBC + *size += 1664; +#elif SANITIZER_FREEBSD + *size += 128; // RTLD_STATIC_TLS_EXTRA +# if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 const uptr pre_tcb_size = TlsPreTcbSize(); *addr -= pre_tcb_size; *size += pre_tcb_size; -#else +# else // arm and aarch64 reserve two words at TP, so this underestimates the range. // However, this is sufficient for the purpose of finding the pointers to // thread-specific data keys. const uptr tcb_size = ThreadDescriptorSize(); *addr -= tcb_size; *size += tcb_size; +# endif #endif # endif #elif SANITIZER_NETBSD diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index a5311d266b0c4..ec5f2edab6a64 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -96,7 +96,7 @@ # include #if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || \ defined(__hexagon__) || defined(__loongarch__) || SANITIZER_RISCV64 || \ -defined(__sparc__) +defined(__sparc__) || defined(__powerpc64__) # include # ifdef __arm__ typedef struct user_fpregs elf_fpregset_t; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp index 945da99d41f4e..58d17d90c343a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp @@ -31,7 +31,8 @@ #include // for pid_t #include // for iovec #include // for NT_PRSTATUS -#if (defined(__aarch64__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) && \ +#if (defined(__aarch64__) || defined(__powerpc64__) || \ + SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) &&\ !SANITIZER_ANDROID // GLIBC 2.20+ sys/user does not include asm/ptrace.h # include ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Fix build on ppc64+musl (#120036) (PR #139389)
llvmbot wrote: @llvm/pr-subscribers-compiler-rt-sanitizer Author: None (llvmbot) Changes Backport 801b519dfd01e21da0be17aa8f8dc2ceb0eb9e77 Requested by: @tstellar --- Full diff: https://github.com/llvm/llvm-project/pull/139389.diff 4 Files Affected: - (modified) compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp (+4) - (modified) compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp (+7-6) - (modified) compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp (+1-1) - (modified) compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp (+2-1) ``diff diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 7aa48d29d2d53..a4d526b4466c3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -86,6 +86,10 @@ #include # endif +# if SANITIZER_LINUX && defined(__powerpc64__) +#include +# endif + # if SANITIZER_FREEBSD #include #include diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp index e11eff13cd326..331e1c7d8d152 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp @@ -619,21 +619,22 @@ static void GetTls(uptr *addr, uptr *size) { *addr = tp - RoundUpTo(*size, align); *size = tp - *addr + ThreadDescriptorSize(); # else - if (SANITIZER_GLIBC) -*size += 1664; - else if (SANITIZER_FREEBSD) -*size += 128; // RTLD_STATIC_TLS_EXTRA -#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 +#if SANITIZER_GLIBC + *size += 1664; +#elif SANITIZER_FREEBSD + *size += 128; // RTLD_STATIC_TLS_EXTRA +# if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64 const uptr pre_tcb_size = TlsPreTcbSize(); *addr -= pre_tcb_size; *size += pre_tcb_size; -#else +# else // arm and aarch64 reserve two words at TP, so this underestimates the range. // However, this is sufficient for the purpose of finding the pointers to // thread-specific data keys. const uptr tcb_size = ThreadDescriptorSize(); *addr -= tcb_size; *size += tcb_size; +# endif #endif # endif #elif SANITIZER_NETBSD diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index a5311d266b0c4..ec5f2edab6a64 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -96,7 +96,7 @@ # include #if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || \ defined(__hexagon__) || defined(__loongarch__) || SANITIZER_RISCV64 || \ -defined(__sparc__) +defined(__sparc__) || defined(__powerpc64__) # include # ifdef __arm__ typedef struct user_fpregs elf_fpregset_t; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp index 945da99d41f4e..58d17d90c343a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp @@ -31,7 +31,8 @@ #include // for pid_t #include // for iovec #include // for NT_PRSTATUS -#if (defined(__aarch64__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) && \ +#if (defined(__aarch64__) || defined(__powerpc64__) || \ + SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) &&\ !SANITIZER_ANDROID // GLIBC 2.20+ sys/user does not include asm/ptrace.h # include `` https://github.com/llvm/llvm-project/pull/139389 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Fix build on ppc64+musl (#120036) (PR #139389)
llvmbot wrote: @vitalybuka What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/139389 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Fix build on ppc64+musl (#120036) (PR #139389)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/139389 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Fix build on ppc64+musl (#120036) (PR #139389)
github-actions[bot] wrote: ⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo. Please turn off [Keep my email addresses private](https://github.com/settings/emails) setting in your account. See [LLVM Discourse](https://discourse.llvm.org/t/hidden-emails-on-github-should-we-do-something-about-it) for more information. https://github.com/llvm/llvm-project/pull/139389 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [RISCV] Fix assertion failure when using -fstack-clash-protection (#135248) (PR #139388)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/139388 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)
anutosh491 wrote: Hey @tstellar, The CI is green. Should be good to go ! https://github.com/llvm/llvm-project/pull/137620 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] 0019b7d - [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970)
Author: Anutosh Bhat Date: 2025-05-10T11:12:53-07:00 New Revision: 0019b7d0ae0bcc65af065542fcfb48ea0eb55d38 URL: https://github.com/llvm/llvm-project/commit/0019b7d0ae0bcc65af065542fcfb48ea0eb55d38 DIFF: https://github.com/llvm/llvm-project/commit/0019b7d0ae0bcc65af065542fcfb48ea0eb55d38.diff LOG: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) Towards This change moves WasmSym from a static global struct to an instance owned by Ctx, allowing it to be reset cleanly between linker runs. This enables safe support for multiple invocations of wasm-ld within the same process Changes done - Converted WasmSym from a static struct to a regular struct with instance members. - Added a std::unique_ptr wasmSym field inside Ctx. - Reset wasmSym in Ctx::reset() to clear state between links. - Replaced all WasmSym:: references with ctx.wasmSym->. - Removed global symbol definitions from Symbols.cpp that are no longer needed. Clearing wasmSym in ctx.reset() ensures a clean slate for each link invocation, preventing symbol leakage across runs—critical when using wasm-ld/lld as a reentrant library where global state can cause subtle, hard-to-debug errors. - Co-authored-by: Vassil Vassilev (cherry picked from commit 9cbbb74d370c09e13b8412f21dccb7d2c4afc6a4) Added: Modified: lld/wasm/Config.h lld/wasm/Driver.cpp lld/wasm/InputChunks.cpp lld/wasm/MarkLive.cpp lld/wasm/OutputSections.cpp lld/wasm/Symbols.cpp lld/wasm/Symbols.h lld/wasm/SyntheticSections.cpp lld/wasm/Writer.cpp Removed: diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 1fa6c42d9cd86..527edc11c48e3 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -32,6 +32,11 @@ class InputTable; class InputGlobal; class InputFunction; class Symbol; +class DefinedData; +class GlobalSymbol; +class DefinedFunction; +class UndefinedGlobal; +class TableSymbol; // For --unresolved-symbols. enum class UnresolvedPolicy { ReportError, Warn, Ignore, ImportDynamic }; @@ -139,6 +144,107 @@ struct Ctx { llvm::SmallVector syntheticGlobals; llvm::SmallVector syntheticTables; + // linker-generated symbols + struct WasmSym { +// __global_base +// Symbol marking the start of the global section. +DefinedData *globalBase; + +// __stack_pointer/__stack_low/__stack_high +// Global that holds current value of stack pointer and data symbols marking +// the start and end of the stack region. stackPointer is initialized to +// stackHigh and grows downwards towards stackLow +GlobalSymbol *stackPointer; +DefinedData *stackLow; +DefinedData *stackHigh; + +// __tls_base +// Global that holds the address of the base of the current thread's +// TLS block. +GlobalSymbol *tlsBase; + +// __tls_size +// Symbol whose value is the size of the TLS block. +GlobalSymbol *tlsSize; + +// __tls_size +// Symbol whose value is the alignment of the TLS block. +GlobalSymbol *tlsAlign; + +// __data_end +// Symbol marking the end of the data and bss. +DefinedData *dataEnd; + +// __heap_base/__heap_end +// Symbols marking the beginning and end of the "heap". It starts at the end +// of the data, bss and explicit stack, and extends to the end of the linear +// memory allocated by wasm-ld. This region of memory is not used by the +// linked code, so it may be used as a backing store for `sbrk` or `malloc` +// implementations. +DefinedData *heapBase; +DefinedData *heapEnd; + +// __wasm_init_memory_flag +// Symbol whose contents are nonzero iff memory has already been +// initialized. +DefinedData *initMemoryFlag; + +// __wasm_init_memory +// Function that initializes passive data segments during instantiation. +DefinedFunction *initMemory; + +// __wasm_call_ctors +// Function that directly calls all ctors in priority order. +DefinedFunction *callCtors; + +// __wasm_call_dtors +// Function that calls the libc/etc. cleanup function. +DefinedFunction *callDtors; + +// __wasm_apply_global_relocs +// Function that applies relocations to wasm globals post-instantiation. +// Unlike __wasm_apply_data_relocs this needs to run on every thread. +DefinedFunction *applyGlobalRelocs; + +// __wasm_apply_tls_relocs +// Like __wasm_apply_data_relocs but for TLS section. These must be +// delayed until __wasm_init_tls. +DefinedFunction *applyTLSRelocs; + +// __wasm_apply_global_tls_relocs +// Like applyGlobalRelocs but for globals that hold TLS addresses. These +// must be delayed until __wasm_init_tls. +DefinedFunction *applyGlobalTLSRelocs; + +// __wasm_init_tls +// Function that allocates thread-local storage and initializes it. +DefinedFunction *initTLS; + +// Pointer to the function
[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/137620 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 72ad9be - [Clang][MicrosoftMangle] Implement mangling for ConstantMatrixType (#134930)
Author: Losy001 Date: 2025-05-10T13:05:55-07:00 New Revision: 72ad9be1e337f487c9db4dd634005d09f7bf2790 URL: https://github.com/llvm/llvm-project/commit/72ad9be1e337f487c9db4dd634005d09f7bf2790 DIFF: https://github.com/llvm/llvm-project/commit/72ad9be1e337f487c9db4dd634005d09f7bf2790.diff LOG: [Clang][MicrosoftMangle] Implement mangling for ConstantMatrixType (#134930) This pull request implements mangling for ConstantMatrixType, allowing matrices to be used on Windows. Related issues: #53158, #127127 This example code: ```cpp #include #include typedef float Matrix4 __attribute__((matrix_type(4, 4))); int main() { printf("%s\n", typeid(Matrix4).name()); } ``` Outputs this: ``` struct __clang::__matrix ``` (cherry picked from commit f5a30f111dc4ad6422863722eb708059a68a9d5c) Added: clang/test/CodeGenCXX/mangle-ms-matrix.cpp Modified: clang/lib/AST/MicrosoftMangle.cpp Removed: diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 42b735ccf4a2c..74c995f2f97f0 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -3552,7 +3552,22 @@ void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T, void MicrosoftCXXNameMangler::mangleType(const ConstantMatrixType *T, Qualifiers quals, SourceRange Range) { - Error(Range.getBegin(), "matrix type") << Range; + QualType EltTy = T->getElementType(); + const BuiltinType *ET = EltTy->getAs(); + + llvm::SmallString<64> TemplateMangling; + llvm::raw_svector_ostream Stream(TemplateMangling); + MicrosoftCXXNameMangler Extra(Context, Stream); + + Stream << "?$"; + + Extra.mangleSourceName("__matrix"); + Extra.mangleType(EltTy, Range, QMM_Escape); + + Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumRows())); + Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumColumns())); + + mangleArtificialTagType(TagTypeKind::Struct, TemplateMangling, {"__clang"}); } void MicrosoftCXXNameMangler::mangleType(const DependentSizedMatrixType *T, diff --git a/clang/test/CodeGenCXX/mangle-ms-matrix.cpp b/clang/test/CodeGenCXX/mangle-ms-matrix.cpp new file mode 100644 index 0..b244aa6e33cfa --- /dev/null +++ b/clang/test/CodeGenCXX/mangle-ms-matrix.cpp @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -fenable-matrix -fms-extensions -fcxx-exceptions -ffreestanding -target-feature +avx -emit-llvm %s -o - -triple=i686-pc-win32 | FileCheck %s +// RUN: %clang_cc1 -fenable-matrix -fms-extensions -fcxx-exceptions -ffreestanding -target-feature +avx -emit-llvm %s -o - -triple=i686-pc-win32 -fexperimental-new-constant-interpreter | FileCheck %s + +typedef float __attribute__((matrix_type(4, 4))) m4x4f; +typedef float __attribute__((matrix_type(2, 2))) m2x2f; + +typedef int __attribute__((matrix_type(4, 4))) m4x4i; +typedef int __attribute__((matrix_type(2, 2))) m2x2i; + +void thow(int i) { + switch (i) { +case 0: throw m4x4f(); +// CHECK: ??_R0U?$__matrix@M$03$03@__clang@@@8 +// CHECK: _CT??_R0U?$__matrix@M$03$03@__clang@@@864 +// CHECK: _CTA1U?$__matrix@M$03$03@__clang@@ +// CHECK: _TI1U?$__matrix@M$03$03@__clang@@ +case 1: throw m2x2f(); +// CHECK: ??_R0U?$__matrix@M$01$01@__clang@@@8 +// CHECK: _CT??_R0U?$__matrix@M$01$01@__clang@@@816 +// CHECK: _CTA1U?$__matrix@M$01$01@__clang@@ +// CHECK: _TI1U?$__matrix@M$01$01@__clang@@ +case 2: throw m4x4i(); +// CHECK: ??_R0U?$__matrix@H$03$03@__clang@@@8 +// CHECK: _CT??_R0U?$__matrix@H$03$03@__clang@@@864 +// CHECK: _CTA1U?$__matrix@H$03$03@__clang@@ +// CHECK: _TI1U?$__matrix@H$03$03@__clang@@ +case 3: throw m2x2i(); +// CHECK: ??_R0U?$__matrix@H$01$01@__clang@@@8 +// CHECK: _CT??_R0U?$__matrix@H$01$01@__clang@@@816 +// CHECK: _CTA1U?$__matrix@H$01$01@__clang@@ +// CHECK: _TI1U?$__matrix@H$01$01@__clang@@ + } +} + +void foo44f(m4x4f) {} +// CHECK: define dso_local void @"?foo44f@@YAXU?$__matrix@M$03$03@__clang@@@Z" + +m4x4f rfoo44f() { return m4x4f(); } +// CHECK: define dso_local noundef <16 x float> @"?rfoo44f@@YAU?$__matrix@M$03$03@__clang@@XZ" + +void foo22f(m2x2f) {} +// CHECK: define dso_local void @"?foo22f@@YAXU?$__matrix@M$01$01@__clang@@@Z" + +m2x2f rfoo22f() { return m2x2f(); } +// CHECK: define dso_local noundef <4 x float> @"?rfoo22f@@YAU?$__matrix@M$01$01@__clang@@XZ" + +void foo44i(m4x4i) {} +// CHECK: define dso_local void @"?foo44i@@YAXU?$__matrix@H$03$03@__clang@@@Z" + +m4x4i rfoo44i() { return m4x4i(); } +// CHECK: define dso_local noundef <16 x i32> @"?rfoo44i@@YAU?$__matrix@H$03$03@__clang@@XZ" + +void foo22i(m2x2i) {} +// CHECK: define dso_local void @"?foo22i@@YAXU?$__matrix@H$01$01@__clang@@@Z" + +m2x2i rfoo22i() { return m2x2i(); } +// CHECK: define dso_local noundef <4 x i32> @"?rfoo22i@@YAU?$__matrix@H$01$01@__clang@@XZ" \ No newline at end of file
[llvm-branch-commits] [clang] 41c36d9 - [clang] Fix unused variable warning in MS mangler from constant matrix patch
Author: Reid Kleckner Date: 2025-05-10T13:05:55-07:00 New Revision: 41c36d94080488cc938b1c1697c7e8353405cd75 URL: https://github.com/llvm/llvm-project/commit/41c36d94080488cc938b1c1697c7e8353405cd75 DIFF: https://github.com/llvm/llvm-project/commit/41c36d94080488cc938b1c1697c7e8353405cd75.diff LOG: [clang] Fix unused variable warning in MS mangler from constant matrix patch (cherry picked from commit ccdd55c518277d749eff878ffcb5ca3de55c2a60) Added: Modified: clang/lib/AST/MicrosoftMangle.cpp Removed: diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 74c995f2f97f0..cb35dbd611204 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -3553,7 +3553,6 @@ void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T, void MicrosoftCXXNameMangler::mangleType(const ConstantMatrixType *T, Qualifiers quals, SourceRange Range) { QualType EltTy = T->getElementType(); - const BuiltinType *ET = EltTy->getAs(); llvm::SmallString<64> TemplateMangling; llvm::raw_svector_ostream Stream(TemplateMangling); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [SelectionDAGBuilder] Use address width when lowering ptrtoaddr (PR #139423)
https://github.com/arichardson created https://github.com/llvm/llvm-project/pull/139423 Instead of just deferring to ptrtoint, we should truncate to the index width and then perform the ZextOrTrunc. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [SelectionDAGBuilder] Use address width when lowering ptrtoaddr (PR #139423)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-selectiondag Author: Alexander Richardson (arichardson) Changes Instead of just deferring to ptrtoint, we should truncate to the index width and then perform the ZextOrTrunc. --- Full diff: https://github.com/llvm/llvm-project/pull/139423.diff 2 Files Affected: - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+14-1) - (modified) llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll (+5-1) ``diff diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e6651d000bd71..806bab5379bde 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3878,7 +3878,20 @@ void SelectionDAGBuilder::visitSIToFP(const User &I) { } void SelectionDAGBuilder::visitPtrToAddr(const User &I) { - visitPtrToInt(I); + const auto &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &Ctx = *DAG.getContext(); + // ptrtoaddr is equivalent to a truncate of ptrtoint to address/index width + SDValue N = getValue(I.getOperand(0)); + Type *PtrTy = I.getOperand(0)->getType(); + EVT AddrVT = EVT::getIntegerVT(Ctx, DL.getPointerAddressSizeInBits(PtrTy)); + if (auto *VTy = dyn_cast(PtrTy)) { +Type *EltTy = VTy->getElementType(); +AddrVT = EVT::getVectorVT(Ctx, AddrVT, VTy->getElementCount()); + } + N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), AddrVT); + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), TLI.getValueType(DL, I.getType())); + setValue(&I, N); } void SelectionDAGBuilder::visitPtrToInt(const User &I) { diff --git a/llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll b/llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll index 32b5d9441b61c..da4b531ab5b25 100644 --- a/llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll +++ b/llvm/test/CodeGen/AMDGPU/ptrtoint-ptrtoaddr-p8.ll @@ -32,8 +32,9 @@ define <2 x i64> @ptrtoaddr_vec(<2 x ptr addrspace(8)> %ptr) { ; CHECK-LABEL: ptrtoaddr_vec: ; CHECK: ; %bb.0: ; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT:v_mov_b32_e32 v3, v5 ; CHECK-NEXT:v_mov_b32_e32 v2, v4 +; CHECK-NEXT:v_and_b32_e32 v1, 0x, v1 +; CHECK-NEXT:v_and_b32_e32 v3, 0x, v5 ; CHECK-NEXT:s_setpc_b64 s[30:31] %ret = ptrtoaddr <2 x ptr addrspace(8)> %ptr to <2 x i64> ret <2 x i64> %ret @@ -57,6 +58,9 @@ define i128 @ptrtoaddr_ext(ptr addrspace(8) %ptr) { ; CHECK-LABEL: ptrtoaddr_ext: ; CHECK: ; %bb.0: ; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT:v_and_b32_e32 v1, 0x, v1 +; CHECK-NEXT:v_mov_b32_e32 v2, 0 +; CHECK-NEXT:v_mov_b32_e32 v3, 0 ; CHECK-NEXT:s_setpc_b64 s[30:31] %ret = ptrtoaddr ptr addrspace(8) %ptr to i128 ret i128 %ret `` https://github.com/llvm/llvm-project/pull/139423 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAGBuilder] Use address width when lowering ptrtoaddr (PR #139423)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/139423 >From d4520dc202a500c80309cf09517b2a2823bf13ab Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Sat, 10 May 2025 17:33:01 -0700 Subject: [PATCH] remove unused variable Created using spr 1.3.6-beta.1 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 806bab5379bde..66b11030ce5a5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3885,10 +3885,8 @@ void SelectionDAGBuilder::visitPtrToAddr(const User &I) { SDValue N = getValue(I.getOperand(0)); Type *PtrTy = I.getOperand(0)->getType(); EVT AddrVT = EVT::getIntegerVT(Ctx, DL.getPointerAddressSizeInBits(PtrTy)); - if (auto *VTy = dyn_cast(PtrTy)) { -Type *EltTy = VTy->getElementType(); + if (auto *VTy = dyn_cast(PtrTy)) AddrVT = EVT::getVectorVT(Ctx, AddrVT, VTy->getElementCount()); - } N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), AddrVT); N = DAG.getZExtOrTrunc(N, getCurSDLoc(), TLI.getValueType(DL, I.getType())); setValue(&I, N); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR] Introduce the `ptrtoaddr` instruction (PR #139357)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/139357 >From 25dc175562349410f161ef0e80246301d9a7ba79 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 9 May 2025 22:43:37 -0700 Subject: [PATCH] fix docs build Created using spr 1.3.6-beta.1 --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 2d18d0d97aaee..38be6918ff73c 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -12435,7 +12435,7 @@ Example: .. _i_ptrtoaddr: '``ptrtoaddr .. to``' Instruction - +^ Syntax: """ ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)
llvmbot wrote: @llvm/pr-subscribers-llvm-selectiondag Author: Alexander Richardson (arichardson) Changes Of the 128-bits of buffer descriptor only 48 bits are address bits, so following the discussion on https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54, the logic conclusion is to set the index width to 48 bits instead of the current value of 128. Most of the test changes are mechanical datalayout updates, but there is one actual change: the ptrmask test now uses .i48 instead of .i128 and I had to update SelectionDAGBuilder to correctly extend the mask. --- Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139419.diff 34 Files Affected: - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) - (modified) clang/test/CodeGen/target-data.c (+2-2) - (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) - (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) - (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll (+1-1) - (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) - (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll (+1-1) - (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) - (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) - (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) - (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) - (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) ``diff diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index c368200f3f739..056a3d6579fa5 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 = static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" -"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" -"32-v48:64-v96:128" -"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" -"-ni:7:8:9"; +"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" +"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // Default diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 9cb00e8ee73d3..41a3f59b0fc81 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -176,12 +176,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v3
[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)
llvmbot wrote: @llvm/pr-subscribers-llvm-ir Author: Alexander Richardson (arichardson) Changes Of the 128-bits of buffer descriptor only 48 bits are address bits, so following the discussion on https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54, the logic conclusion is to set the index width to 48 bits instead of the current value of 128. Most of the test changes are mechanical datalayout updates, but there is one actual change: the ptrmask test now uses .i48 instead of .i128 and I had to update SelectionDAGBuilder to correctly extend the mask. --- Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139419.diff 34 Files Affected: - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) - (modified) clang/test/CodeGen/target-data.c (+2-2) - (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) - (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) - (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll (+1-1) - (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) - (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll (+1-1) - (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) - (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) - (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) - (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) - (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) ``diff diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index c368200f3f739..056a3d6579fa5 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 = static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" -"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" -"32-v48:64-v96:128" -"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" -"-ni:7:8:9"; +"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" +"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // Default diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 9cb00e8ee73d3..41a3f59b0fc81 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -176,12 +176,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:6
[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)
llvmbot wrote: @llvm/pr-subscribers-clang @llvm/pr-subscribers-llvm-transforms Author: Alexander Richardson (arichardson) Changes Of the 128-bits of buffer descriptor only 48 bits are address bits, so following the discussion on https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54, the logic conclusion is to set the index width to 48 bits instead of the current value of 128. Most of the test changes are mechanical datalayout updates, but there is one actual change: the ptrmask test now uses .i48 instead of .i128 and I had to update SelectionDAGBuilder to correctly extend the mask. --- Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139419.diff 34 Files Affected: - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) - (modified) clang/test/CodeGen/target-data.c (+2-2) - (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) - (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) - (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll (+1-1) - (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) - (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll (+1-1) - (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) - (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) - (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) - (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) - (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) ``diff diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index c368200f3f739..056a3d6579fa5 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 = static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" -"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" -"32-v48:64-v96:128" -"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" -"-ni:7:8:9"; +"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" +"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // Default diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 9cb00e8ee73d3..41a3f59b0fc81 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -176,12 +176,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:3
[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Alexander Richardson (arichardson) Changes Of the 128-bits of buffer descriptor only 48 bits are address bits, so following the discussion on https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54, the logic conclusion is to set the index width to 48 bits instead of the current value of 128. Most of the test changes are mechanical datalayout updates, but there is one actual change: the ptrmask test now uses .i48 instead of .i128 and I had to update SelectionDAGBuilder to correctly extend the mask. --- Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139419.diff 34 Files Affected: - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) - (modified) clang/test/CodeGen/target-data.c (+2-2) - (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) - (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) - (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll (+1-1) - (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) - (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll (+1-1) - (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) - (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) - (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) - (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) - (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) ``diff diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index c368200f3f739..056a3d6579fa5 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 = static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" -"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" -"32-v48:64-v96:128" -"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" -"-ni:7:8:9"; +"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" +"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // Default diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 9cb00e8ee73d3..41a3f59b0fc81 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -176,12 +176,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:3
[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)
llvmbot wrote: @llvm/pr-subscribers-llvm-globalisel Author: Alexander Richardson (arichardson) Changes Of the 128-bits of buffer descriptor only 48 bits are address bits, so following the discussion on https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54, the logic conclusion is to set the index width to 48 bits instead of the current value of 128. Most of the test changes are mechanical datalayout updates, but there is one actual change: the ptrmask test now uses .i48 instead of .i128 and I had to update SelectionDAGBuilder to correctly extend the mask. --- Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139419.diff 34 Files Affected: - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) - (modified) clang/test/CodeGen/target-data.c (+2-2) - (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) - (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) - (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll (+1-1) - (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) - (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll (+1-1) - (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) - (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) - (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) - (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) - (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) ``diff diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index c368200f3f739..056a3d6579fa5 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 = static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" -"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" -"32-v48:64-v96:128" -"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" -"-ni:7:8:9"; +"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" +"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // Default diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 9cb00e8ee73d3..41a3f59b0fc81 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -176,12 +176,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:
[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)
llvmbot wrote: @llvm/pr-subscribers-flang-openmp Author: Alexander Richardson (arichardson) Changes Of the 128-bits of buffer descriptor only 48 bits are address bits, so following the discussion on https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54, the logic conclusion is to set the index width to 48 bits instead of the current value of 128. Most of the test changes are mechanical datalayout updates, but there is one actual change: the ptrmask test now uses .i48 instead of .i128 and I had to update SelectionDAGBuilder to correctly extend the mask. --- Full diff: https://github.com/llvm/llvm-project/pull/139419.diff 34 Files Affected: - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) - (modified) clang/test/CodeGen/target-data.c (+2-2) - (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) - (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) - (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll (+1-1) - (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) - (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll (+1-1) - (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) - (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) - (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) - (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) - (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) ``diff Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again;
[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Alexander Richardson (arichardson) Changes Of the 128-bits of buffer descriptor only 48 bits are address bits, so following the discussion on https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54, the logic conclusion is to set the index width to 48 bits instead of the current value of 128. Most of the test changes are mechanical datalayout updates, but there is one actual change: the ptrmask test now uses .i48 instead of .i128 and I had to update SelectionDAGBuilder to correctly extend the mask. --- Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139419.diff 34 Files Affected: - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) - (modified) clang/test/CodeGen/target-data.c (+2-2) - (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) - (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) - (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll (+1-1) - (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) - (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll (+1-1) - (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) - (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) - (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) - (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) - (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) ``diff diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index c368200f3f739..056a3d6579fa5 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 = static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" -"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" -"32-v48:64-v96:128" -"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" -"-ni:7:8:9"; +"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" +"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // Default diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 9cb00e8ee73d3..41a3f59b0fc81 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -176,12 +176,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32
[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)
@@ -7970,17 +7970,26 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // On arm64_32, pointers are 32 bits when stored in memory, but // zero-extended to 64 bits when in registers. Thus the mask is 32 bits to -// match the index type, but the pointer is 64 bits, so the the mask must be +// match the index type, but the pointer is 64 bits, so the mask must be // zero-extended up to 64 bits to match the pointer. EVT PtrVT = TLI.getValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); assert(PtrVT == Ptr.getValueType()); -assert(MemVT == Mask.getValueType()); -if (MemVT != PtrVT) +if (Mask.getValueType().getFixedSizeInBits() < MemVT.getFixedSizeInBits()) { + // For AMDGPU buffer descriptors the mask is 48 bits, but the pointer is + // 128-bit, so we have to pad the mask with ones for unused bits. + auto HighOnes = + DAG.getNode(ISD::SHL, sdl, PtrVT, DAG.getAllOnesConstant(sdl, PtrVT), + DAG.getConstant(Mask.getValueType().getFixedSizeInBits(), + sdl, PtrVT)); + Mask = DAG.getNode(ISD::OR, sdl, PtrVT, + DAG.getZExtOrTrunc(Mask, sdl, PtrVT), HighOnes); arichardson wrote: Not sure if there is an easier way to one-pad the mask argument but this _should_ do the right thing. https://github.com/llvm/llvm-project/pull/139419 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)
https://github.com/arichardson created https://github.com/llvm/llvm-project/pull/139419 Of the 128-bits of buffer descriptor only 48 bits are address bits, so following the discussion on https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54, the logic conclusion is to set the index width to 48 bits instead of the current value of 128. Most of the test changes are mechanical datalayout updates, but there is one actual change: the ptrmask test now uses .i48 instead of .i128 and I had to update SelectionDAGBuilder to correctly extend the mask. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/139413 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/139413 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)
@@ -145,79 +145,79 @@ define amdgpu_ps ptr addrspace(7) @s_ptrmask_buffer_fat_ptr_i32_neg8(ptr addrspa ret ptr addrspace(7) %masked } -define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128(ptr addrspace(8) %ptr, i128 %mask) { -; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128: +define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i48(ptr addrspace(8) %ptr, i48 %mask) { +; GCN-LABEL: v_ptrmask_buffer_resource_variable_i48: ; GCN: ; %bb.0: ; GCN-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT:v_or_b32_e32 v5, 0x, v5 ; GCN-NEXT:v_and_b32_e32 v1, v1, v5 ; GCN-NEXT:v_and_b32_e32 v0, v0, v4 -; GCN-NEXT:v_and_b32_e32 v3, v3, v7 -; GCN-NEXT:v_and_b32_e32 v2, v2, v6 ; GCN-NEXT:s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128: +; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i48: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT:v_or_b32_e32 v5, 0x, v5 ; GFX10PLUS-NEXT:v_and_b32_e32 v0, v0, v4 ; GFX10PLUS-NEXT:v_and_b32_e32 v1, v1, v5 -; GFX10PLUS-NEXT:v_and_b32_e32 v2, v2, v6 -; GFX10PLUS-NEXT:v_and_b32_e32 v3, v3, v7 ; GFX10PLUS-NEXT:s_setpc_b64 s[30:31] - %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 %mask) + %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 %mask) ret ptr addrspace(8) %masked } -define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128_neg8(ptr addrspace(8) %ptr) { -; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8: +define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i48_neg8(ptr addrspace(8) %ptr) { +; GCN-LABEL: v_ptrmask_buffer_resource_variable_i48_neg8: ; GCN: ; %bb.0: ; GCN-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT:v_and_b32_e32 v0, -8, v0 ; GCN-NEXT:s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8: +; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i48_neg8: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10PLUS-NEXT:v_and_b32_e32 v0, -8, v0 ; GFX10PLUS-NEXT:s_setpc_b64 s[30:31] - %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 -8) + %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i48(ptr addrspace(8) %ptr, i48 -8) ret ptr addrspace(8) %masked } -define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128(ptr addrspace(8) inreg %ptr, i128 inreg %mask) { -; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128: +define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i48(ptr addrspace(8) inreg %ptr, i48 inreg %mask) { +; GCN-LABEL: s_ptrmask_buffer_resource_variable_i48: ; GCN: ; %bb.0: -; GCN-NEXT:s_and_b64 s[4:5], s[4:5], s[8:9] +; GCN-NEXT:s_or_b32 s7, s7, 0x arichardson wrote: I can't read AMDGPU assembly properly so would be good to double-check that this is correct. https://github.com/llvm/llvm-project/pull/139419 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/125432 >From 9fe563bd5347ede6a08e12202f23f49b20b7b64f Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 31 Jan 2025 13:12:56 -0500 Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic vector. After splitting, all elements are created. The two components must be found by looking at the upper and lower half of EXTRACT_ELEMENT. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that unused elements can be removed. commit-id:b83937a8 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++- .../SelectionDAGAddressAnalysis.cpp | 30 ++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 59 +-- llvm/test/CodeGen/X86/atomic-load-store.ll| 149 ++ 5 files changed, 90 insertions(+), 172 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 87b6914f8a0ee..ab8bb517e6ae4 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1873,7 +1873,7 @@ class SelectionDAG { /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their @@ -2311,7 +2311,7 @@ class SelectionDAG { /// merged. Check that both are nonvolatile and if LD is loading /// 'Bytes' bytes from a location that is 'Dist' units away from the /// location that the 'Base' load is loading from. - bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, + bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base, unsigned Bytes, int Dist) const; /// Infer alignment of a load / store address. Return std::nullopt if it diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bbf1b0fd590ef..38b22078c8c44 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12215,7 +12215,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, return TokenFactor; } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, +SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp) { assert(isa(NewMemOp.getNode()) && "Expected a memop node"); SDValue OldChain = SDValue(OldLoad, 1); @@ -12905,17 +12905,21 @@ std::pair SelectionDAG::UnrollVectorOverflowOp( getBuildVector(NewOvVT, dl, OvScalars)); } -bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, - LoadSDNode *Base, +bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD, + MemSDNode *Base, unsigned Bytes, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; - // TODO: probably too restrictive for atomics, revisit - if (!LD->isSimple()) -return false; - if (LD->isIndexed() || Base->isIndexed()) -return false; + if (auto Ld = dyn_cast(LD)) { +if (!Ld->isSimple()) + return false; +if (Ld->isIndexed()) + return false; + } + if (auto Ld = dyn_cast(Base)) +if (Ld->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getMemoryVT(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..c29cb424c7a4c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, } /// Parses tree in Ptr for base, index, offset addresses. -static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, - const SelectionDAG &DAG) { +template +static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); // (((B + I*M) + c)) + c ... @@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, bool IsIndexSignExt = false; // pre-inc/pre-dec ops are components of EA. - if (N->getAddressingMode() == ISD::PRE_INC) { -if (auto *C = dyn_cast(N->getOffset()))
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From e1eaeb6114c9b4a7b432d2655e699b2f7558e824 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 - llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 158 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index c376de877ac7d..70f59eafc6ecb 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + auto *PtrTy = dyn_cast(I->getType()->getScalarType()); + auto *VTy = dyn_cast(I->getType()); + if (VTy && PtrTy && !Result->getType()->isVectorTy()) { +unsigned AS = PtrTy->getAddressSpace(); +Value *BC = Builder.CreateBitCast( +Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); +V = Builder.CreateIntToPtr(BC, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..eaa2ffd9b2731 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index f72970d12b6eb..d3027e799 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -382,6 +382,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec4_i8: ; CHECK3: ## %bb.0: @@ -405,6 +420,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_ptr270: +; CHECK: ## %b
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From e1eaeb6114c9b4a7b432d2655e699b2f7558e824 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 - llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 158 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index c376de877ac7d..70f59eafc6ecb 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + auto *PtrTy = dyn_cast(I->getType()->getScalarType()); + auto *VTy = dyn_cast(I->getType()); + if (VTy && PtrTy && !Result->getType()->isVectorTy()) { +unsigned AS = PtrTy->getAddressSpace(); +Value *BC = Builder.CreateBitCast( +Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); +V = Builder.CreateIntToPtr(BC, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..eaa2ffd9b2731 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index f72970d12b6eb..d3027e799 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -382,6 +382,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec4_i8: ; CHECK3: ## %bb.0: @@ -405,6 +420,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_ptr270: +; CHECK: ## %b
[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From c8fe66e31bdd3bbf5beeb6096e8e4ddaba12dcf6 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 97 ++- llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++ 3 files changed, 153 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8eee7a4c61fe6..f88b4d5693979 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -6014,6 +6017,74 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +/// Either return the same load or provide appropriate casts +/// from the load and return that. +static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, + SDLoc dl, SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } + assert(FirstVT == WidenVT); + return LdOp; +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors"); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector() && + "Must be scalable"); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() && + "Expected equivalent element types"); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + + // Find the vector type that can load from. + std::optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0, + WidthDiff.getKnownMinValue()); + + if (!FirstVT) +return SDValue(); + + SmallVector MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT, + Chain, BasePtr, LD->getMemOperand()); + + // Load the element with one instruction. + SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, + FirstVTWidth, dl, DAG); + + // Modified the chain - switch anything that used the old chain to use + // the new
[llvm-branch-commits] [llvm] [SelectionDAG] Split vector types for atomic load (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From eda6b72faabf12c032b87358f75e1627fe10e221 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [SelectionDAG] Split vector types for atomic load Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 37 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++ 3 files changed, 209 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bdfa5f7741ad3..d8f402f529632 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f88b4d5693979..a3b30943c8e7d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: +SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi); +break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1421,6 +1424,40 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(LD->getExtensionType() == ISD::NON_EXTLOAD && + "Extended load during type legalization!"); + SDLoc dl(LD); + EVT VT = LD->getValueType(0); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + EVT MemIntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits()); + SDValue ALD = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, MemIntVT, IntVT, Ch, + Ptr, LD->getMemOperand()); + + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + SDValue ExtractLo = DAG.getNode(ISD::TRUNCATE, dl, LoIntVT, ALD); + SDValue ExtractHi = + DAG.getNode(ISD::SRL, dl, IntVT, ALD, + DAG.getIntPtrConstant(VT.getSizeInBits() / 2, dl)); + ExtractHi = DAG.getNode(ISD::TRUNCATE, dl, HiIntVT, ExtractHi); + + Lo = DAG.getBitcast(LoVT, ExtractLo); + Hi = DAG.getBitcast(HiVT, ExtractHi); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1)); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 3cf9e3c1a8dfa..6e2e9d4b21891 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -205,6 +205,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK3-LABEL: atomic_vec2_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:shrl $16, %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm1 +; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movl (%rdi), %eax +; CHECK0-NEXT:movl %eax, %ecx +; CHECK0-NEXT:shrl
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From 63a3178d7d13c697e81900def3e706e450ef3437 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6..39e9fdfa5e62b 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From 8671aa64d3d17d769e3d0ae57ffb38dda7176193 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d0b69b88748a9..8eee7a4c61fe6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -458,6 +461,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_ve
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/125432 >From 9fe563bd5347ede6a08e12202f23f49b20b7b64f Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 31 Jan 2025 13:12:56 -0500 Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic vector. After splitting, all elements are created. The two components must be found by looking at the upper and lower half of EXTRACT_ELEMENT. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that unused elements can be removed. commit-id:b83937a8 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++- .../SelectionDAGAddressAnalysis.cpp | 30 ++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 59 +-- llvm/test/CodeGen/X86/atomic-load-store.ll| 149 ++ 5 files changed, 90 insertions(+), 172 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 87b6914f8a0ee..ab8bb517e6ae4 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1873,7 +1873,7 @@ class SelectionDAG { /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their @@ -2311,7 +2311,7 @@ class SelectionDAG { /// merged. Check that both are nonvolatile and if LD is loading /// 'Bytes' bytes from a location that is 'Dist' units away from the /// location that the 'Base' load is loading from. - bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, + bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base, unsigned Bytes, int Dist) const; /// Infer alignment of a load / store address. Return std::nullopt if it diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bbf1b0fd590ef..38b22078c8c44 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12215,7 +12215,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, return TokenFactor; } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, +SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp) { assert(isa(NewMemOp.getNode()) && "Expected a memop node"); SDValue OldChain = SDValue(OldLoad, 1); @@ -12905,17 +12905,21 @@ std::pair SelectionDAG::UnrollVectorOverflowOp( getBuildVector(NewOvVT, dl, OvScalars)); } -bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, - LoadSDNode *Base, +bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD, + MemSDNode *Base, unsigned Bytes, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; - // TODO: probably too restrictive for atomics, revisit - if (!LD->isSimple()) -return false; - if (LD->isIndexed() || Base->isIndexed()) -return false; + if (auto Ld = dyn_cast(LD)) { +if (!Ld->isSimple()) + return false; +if (Ld->isIndexed()) + return false; + } + if (auto Ld = dyn_cast(Base)) +if (Ld->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getMemoryVT(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..c29cb424c7a4c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, } /// Parses tree in Ptr for base, index, offset addresses. -static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, - const SelectionDAG &DAG) { +template +static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); // (((B + I*M) + c)) + c ... @@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, bool IsIndexSignExt = false; // pre-inc/pre-dec ops are components of EA. - if (N->getAddressingMode() == ISD::PRE_INC) { -if (auto *C = dyn_cast(N->getOffset()))
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From b24b74b999f530c75e41747bbb55736276b38852 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9f75fe8803cda..dfce56653af01 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc8..6efcbb80c0ce6 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/125432 >From 9fe563bd5347ede6a08e12202f23f49b20b7b64f Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 31 Jan 2025 13:12:56 -0500 Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic vector. After splitting, all elements are created. The two components must be found by looking at the upper and lower half of EXTRACT_ELEMENT. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that unused elements can be removed. commit-id:b83937a8 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++- .../SelectionDAGAddressAnalysis.cpp | 30 ++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 59 +-- llvm/test/CodeGen/X86/atomic-load-store.ll| 149 ++ 5 files changed, 90 insertions(+), 172 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 87b6914f8a0ee..ab8bb517e6ae4 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1873,7 +1873,7 @@ class SelectionDAG { /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their @@ -2311,7 +2311,7 @@ class SelectionDAG { /// merged. Check that both are nonvolatile and if LD is loading /// 'Bytes' bytes from a location that is 'Dist' units away from the /// location that the 'Base' load is loading from. - bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, + bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base, unsigned Bytes, int Dist) const; /// Infer alignment of a load / store address. Return std::nullopt if it diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bbf1b0fd590ef..38b22078c8c44 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12215,7 +12215,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, return TokenFactor; } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, +SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp) { assert(isa(NewMemOp.getNode()) && "Expected a memop node"); SDValue OldChain = SDValue(OldLoad, 1); @@ -12905,17 +12905,21 @@ std::pair SelectionDAG::UnrollVectorOverflowOp( getBuildVector(NewOvVT, dl, OvScalars)); } -bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, - LoadSDNode *Base, +bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD, + MemSDNode *Base, unsigned Bytes, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; - // TODO: probably too restrictive for atomics, revisit - if (!LD->isSimple()) -return false; - if (LD->isIndexed() || Base->isIndexed()) -return false; + if (auto Ld = dyn_cast(LD)) { +if (!Ld->isSimple()) + return false; +if (Ld->isIndexed()) + return false; + } + if (auto Ld = dyn_cast(Base)) +if (Ld->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getMemoryVT(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..c29cb424c7a4c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, } /// Parses tree in Ptr for base, index, offset addresses. -static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, - const SelectionDAG &DAG) { +template +static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); // (((B + I*M) + c)) + c ... @@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, bool IsIndexSignExt = false; // pre-inc/pre-dec ops are components of EA. - if (N->getAddressingMode() == ISD::PRE_INC) { -if (auto *C = dyn_cast(N->getOffset()))
[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From c8fe66e31bdd3bbf5beeb6096e8e4ddaba12dcf6 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 97 ++- llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++ 3 files changed, 153 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8eee7a4c61fe6..f88b4d5693979 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -6014,6 +6017,74 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +/// Either return the same load or provide appropriate casts +/// from the load and return that. +static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, + SDLoc dl, SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } + assert(FirstVT == WidenVT); + return LdOp; +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors"); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector() && + "Must be scalable"); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() && + "Expected equivalent element types"); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + + // Find the vector type that can load from. + std::optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0, + WidthDiff.getKnownMinValue()); + + if (!FirstVT) +return SDValue(); + + SmallVector MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT, + Chain, BasePtr, LD->getMemOperand()); + + // Load the element with one instruction. + SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, + FirstVTWidth, dl, DAG); + + // Modified the chain - switch anything that used the old chain to use + // the new
[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From c8fe66e31bdd3bbf5beeb6096e8e4ddaba12dcf6 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 97 ++- llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++ 3 files changed, 153 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8eee7a4c61fe6..f88b4d5693979 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -6014,6 +6017,74 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +/// Either return the same load or provide appropriate casts +/// from the load and return that. +static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, + SDLoc dl, SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } + assert(FirstVT == WidenVT); + return LdOp; +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors"); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector() && + "Must be scalable"); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() && + "Expected equivalent element types"); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + + // Find the vector type that can load from. + std::optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0, + WidthDiff.getKnownMinValue()); + + if (!FirstVT) +return SDValue(); + + SmallVector MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT, + Chain, BasePtr, LD->getMemOperand()); + + // Load the element with one instruction. + SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, + FirstVTWidth, dl, DAG); + + // Modified the chain - switch anything that used the old chain to use + // the new
[llvm-branch-commits] [llvm] [SelectionDAG] Split vector types for atomic load (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From eda6b72faabf12c032b87358f75e1627fe10e221 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [SelectionDAG] Split vector types for atomic load Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 37 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++ 3 files changed, 209 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bdfa5f7741ad3..d8f402f529632 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f88b4d5693979..a3b30943c8e7d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: +SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi); +break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1421,6 +1424,40 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(LD->getExtensionType() == ISD::NON_EXTLOAD && + "Extended load during type legalization!"); + SDLoc dl(LD); + EVT VT = LD->getValueType(0); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + EVT MemIntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits()); + SDValue ALD = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, MemIntVT, IntVT, Ch, + Ptr, LD->getMemOperand()); + + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + SDValue ExtractLo = DAG.getNode(ISD::TRUNCATE, dl, LoIntVT, ALD); + SDValue ExtractHi = + DAG.getNode(ISD::SRL, dl, IntVT, ALD, + DAG.getIntPtrConstant(VT.getSizeInBits() / 2, dl)); + ExtractHi = DAG.getNode(ISD::TRUNCATE, dl, HiIntVT, ExtractHi); + + Lo = DAG.getBitcast(LoVT, ExtractLo); + Hi = DAG.getBitcast(HiVT, ExtractHi); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1)); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 3cf9e3c1a8dfa..6e2e9d4b21891 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -205,6 +205,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK3-LABEL: atomic_vec2_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:shrl $16, %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm1 +; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movl (%rdi), %eax +; CHECK0-NEXT:movl %eax, %ecx +; CHECK0-NEXT:shrl
[llvm-branch-commits] [clang] release/20.x: [Clang][MicrosoftMangle] Implement mangling for ConstantMatrixType (#134930) (PR #138017)
github-actions[bot] wrote: @tstellar (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/138017 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [Clang][MicrosoftMangle] Implement mangling for ConstantMatrixType (#134930) (PR #138017)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/138017 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][MemRef] Use specialized index ops to fold expand/collapse_shape (PR #138930)
https://github.com/MaheshRavishankar approved this pull request. The changes look good to me. It isn't strictly required, by given that book h of us work on the same downstream project, does this pass with the said downstream project. But this looks good to me https://github.com/llvm/llvm-project/pull/138930 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)
https://github.com/arichardson created https://github.com/llvm/llvm-project/pull/139413 None ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Alexander Richardson (arichardson) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/139413.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp (+17) - (modified) llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll (+1-4) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index eb768ed9ad5a1..e5c8df0b162d8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -1361,6 +1361,7 @@ class SplitPtrStructs : public InstVisitor { PtrParts visitAtomicCmpXchgInst(AtomicCmpXchgInst &AI); PtrParts visitGetElementPtrInst(GetElementPtrInst &GEP); + PtrParts visitPtrToAddrInst(PtrToAddrInst &PA); PtrParts visitPtrToIntInst(PtrToIntInst &PI); PtrParts visitIntToPtrInst(IntToPtrInst &IP); PtrParts visitAddrSpaceCastInst(AddrSpaceCastInst &I); @@ -1952,6 +1953,22 @@ PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst &PI) { return {nullptr, nullptr}; } +PtrParts SplitPtrStructs::visitPtrToAddrInst(PtrToAddrInst &PA) { + Value *Ptr = PA.getPointerOperand(); + if (!isSplitFatPtr(Ptr->getType())) +return {nullptr, nullptr}; + IRB.SetInsertPoint(&PA); + + auto [Rsrc, Off] = getPtrParts(Ptr); + Value *Res = IRB.CreateIntCast(Off, PA.getType(), /*isSigned=*/false, + PA.getName() + ".off"); + copyMetadata(Res, &PA); + Res->takeName(&PA); + SplitUsers.insert(&PA); + PA.replaceAllUsesWith(Res); + return {nullptr, nullptr}; +} + PtrParts SplitPtrStructs::visitIntToPtrInst(IntToPtrInst &IP) { if (!isSplitFatPtr(IP.getType())) return {nullptr, nullptr}; diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll index 886f2238ac5ab..074c3cf7f3bbf 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll @@ -284,10 +284,7 @@ define i160 @ptrtoaddr_ext(ptr addrspace(7) %ptr) { ; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT:[[PTR_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 0 ; CHECK-NEXT:[[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1 -; CHECK-NEXT:[[RET_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[PTR_RSRC]] to i160 -; CHECK-NEXT:[[TMP1:%.*]] = shl nuw i160 [[RET_RSRC]], 32 -; CHECK-NEXT:[[RET_OFF:%.*]] = zext i32 [[PTR_OFF]] to i160 -; CHECK-NEXT:[[RET:%.*]] = or i160 [[TMP1]], [[RET_OFF]] +; CHECK-NEXT:[[RET:%.*]] = zext i32 [[PTR_OFF]] to i160 ; CHECK-NEXT:ret i160 [[RET]] ; %ret = ptrtoaddr ptr addrspace(7) %ptr to i160 `` https://github.com/llvm/llvm-project/pull/139413 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPULowerBufferFatPointers] Handle ptrtoaddr by extending the offset (PR #139413)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/139413 >From a2dec95d11a68c7911eef777ad78b07aa747bef5 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Sat, 10 May 2025 15:35:50 -0700 Subject: [PATCH] remove fixme Created using spr 1.3.6-beta.1 --- .../test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll index 074c3cf7f3bbf..538145a11c733 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll @@ -278,7 +278,6 @@ define <2 x i32> @ptrtoaddr_vec(<2 x ptr addrspace(7)> %ptr) { } ;; Check that we extend the offset to i160 instead of reinterpreting all bits. -;; FIXME: this is not currently correct. define i160 @ptrtoaddr_ext(ptr addrspace(7) %ptr) { ; CHECK-LABEL: define i160 @ptrtoaddr_ext ; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]]) #[[ATTR0]] { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [Clang][MicrosoftMangle] Implement mangling for ConstantMatrixType (#134930) (PR #138017)
https://github.com/llvmbot updated https://github.com/llvm/llvm-project/pull/138017 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AMDGPU] Set AS8 address width to 48 bits (PR #139419)
llvmbot wrote: @llvm/pr-subscribers-mlir-llvm Author: Alexander Richardson (arichardson) Changes Of the 128-bits of buffer descriptor only 48 bits are address bits, so following the discussion on https://discourse.llvm.org/t/clarifiying-the-semantics-of-ptrtoint/83987/54, the logic conclusion is to set the index width to 48 bits instead of the current value of 128. Most of the test changes are mechanical datalayout updates, but there is one actual change: the ptrmask test now uses .i48 instead of .i128 and I had to update SelectionDAGBuilder to correctly extend the mask. --- Patch is 47.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139419.diff 34 Files Affected: - (modified) clang/lib/Basic/Targets/AMDGPU.cpp (+3-4) - (modified) clang/test/CodeGen/target-data.c (+2-2) - (modified) clang/test/CodeGenOpenCL/amdgpu-env-amdgcn.cl (+1-1) - (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+12-3) - (modified) llvm/lib/IR/AutoUpgrade.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-4) - (modified) llvm/test/Analysis/StackSafetyAnalysis/extend-ptr.ll (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll (+2-3) - (modified) llvm/test/CodeGen/AMDGPU/ptrmask.ll (+23-23) - (modified) llvm/test/Transforms/AlignmentFromAssumptions/amdgpu-crash.ll (+1-1) - (modified) llvm/test/Transforms/EarlyCSE/AMDGPU/memrealtime.ll (+1-1) - (modified) llvm/test/Transforms/FunctionAttrs/make-buffer-rsrc.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll (+1-1) - (modified) llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/attributor_pointer_offset_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/indirect_call_kernel_info_crash.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/spmdization_kernel_env_dep.ll (+1-1) - (modified) llvm/test/Transforms/OpenMP/values_in_offload_arrays.alloca.ll (+1-1) - (modified) llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp (+28-23) - (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+4-2) - (modified) llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp (+1-1) - (modified) mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp (+2-1) - (modified) mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir (+1-1) - (modified) mlir/test/Target/LLVMIR/omptarget-wsloop.mlir (+1-1) ``diff diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index c368200f3f739..056a3d6579fa5 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -33,10 +33,9 @@ static const char *const DataLayoutStringR600 = static const char *const DataLayoutStringAMDGCN = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" -"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:" -"32-v48:64-v96:128" -"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1" -"-ni:7:8:9"; +"-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-" + "v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-" +"v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = { llvm::AMDGPUAS::FLAT_ADDRESS, // Default diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index 9cb00e8ee73d3..41a3f59b0fc81 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -176,12 +176,12 @@ // RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \ // RUN: | FileCheck %s -check-prefix=R600SI -// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48
[llvm-branch-commits] [llvm] [IR] Introduce the `ptrtoaddr` instruction (PR #139357)
https://github.com/arichardson updated https://github.com/llvm/llvm-project/pull/139357 >From 25dc175562349410f161ef0e80246301d9a7ba79 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Fri, 9 May 2025 22:43:37 -0700 Subject: [PATCH] fix docs build Created using spr 1.3.6-beta.1 --- llvm/docs/LangRef.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 2d18d0d97aaee..38be6918ff73c 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -12435,7 +12435,7 @@ Example: .. _i_ptrtoaddr: '``ptrtoaddr .. to``' Instruction - +^ Syntax: """ ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Remove extra MOV after widening atomic load (PR #138635)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/138635 >From e5413e4fe5a5a55587785e7711fda4e3c544c1c9 Mon Sep 17 00:00:00 2001 From: jofernau_amdeng Date: Tue, 6 May 2025 01:48:11 -0400 Subject: [PATCH] [X86] Remove extra MOV after widening atomic load This change adds patterns to optimize out an extra MOV present after widening the atomic load. commit-id:45989503 --- llvm/lib/Target/X86/X86InstrCompiler.td| 7 llvm/test/CodeGen/X86/atomic-load-store.ll | 40 -- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index efa1e8bd7f3e3..786d0567280f9 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1204,6 +1204,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>; def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>; def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>; +def : Pat<(v4i32 (scalar_to_vector (i32 (zext (i16 (atomic_load_16 addr:$src)), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8> +def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src, + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16> +def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src, + (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float> + // Floating point loads/stores. def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>; diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 9ee8b4fc5ac7f..3cf9e3c1a8dfa 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -165,11 +165,15 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) { } define <2 x i16> @atomic_vec2_i16(ptr %x) { -; CHECK-LABEL: atomic_vec2_i16: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec2_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <2 x i16>, ptr %x acquire, align 4 ret <2 x i16> %ret } @@ -177,8 +181,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) { define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { ; CHECK-LABEL: atomic_vec2_ptr270: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8 ret <2 x ptr addrspace(270)> %ret @@ -187,8 +190,7 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { define <2 x i32> @atomic_vec2_i32_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_i32_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x i32>, ptr %x acquire, align 8 ret <2 x i32> %ret @@ -197,8 +199,7 @@ define <2 x i32> @atomic_vec2_i32_align(ptr %x) { define <2 x float> @atomic_vec2_float_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_float_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x float>, ptr %x acquire, align 8 ret <2 x float> %ret @@ -354,11 +355,15 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { } define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { -; CHECK-LABEL: atomic_vec4_i8: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec4_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec4_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <4 x i8>, ptr %x acquire, align 4 ret <4 x i8> %ret } @@ -366,8 +371,7 @@ define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ; CHECK-LABEL: atomic_vec4_i16: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <4 x i16>, ptr %x acquire, align 8 ret <4 x i16> %ret ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bi
[llvm-branch-commits] [llvm] [SPARC] Use op-then-halve instructions when we have VIS3 (PR #135718)
brad0 wrote: @arsenm https://github.com/llvm/llvm-project/pull/135718 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/125432 >From bf8fc80f870022c2a42d01a500e2b16d648dd376 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 31 Jan 2025 13:12:56 -0500 Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic vector. After splitting, all elements are created. The two components must be found by looking at the upper and lower half of EXTRACT_ELEMENT. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that unused elements can be removed. commit-id:b83937a8 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++- .../SelectionDAGAddressAnalysis.cpp | 30 ++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 59 +-- llvm/test/CodeGen/X86/atomic-load-store.ll| 149 ++ 5 files changed, 90 insertions(+), 172 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 87b6914f8a0ee..ab8bb517e6ae4 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1873,7 +1873,7 @@ class SelectionDAG { /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their @@ -2311,7 +2311,7 @@ class SelectionDAG { /// merged. Check that both are nonvolatile and if LD is loading /// 'Bytes' bytes from a location that is 'Dist' units away from the /// location that the 'Base' load is loading from. - bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, + bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base, unsigned Bytes, int Dist) const; /// Infer alignment of a load / store address. Return std::nullopt if it diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bbf1b0fd590ef..38b22078c8c44 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12215,7 +12215,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, return TokenFactor; } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, +SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp) { assert(isa(NewMemOp.getNode()) && "Expected a memop node"); SDValue OldChain = SDValue(OldLoad, 1); @@ -12905,17 +12905,21 @@ std::pair SelectionDAG::UnrollVectorOverflowOp( getBuildVector(NewOvVT, dl, OvScalars)); } -bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, - LoadSDNode *Base, +bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD, + MemSDNode *Base, unsigned Bytes, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; - // TODO: probably too restrictive for atomics, revisit - if (!LD->isSimple()) -return false; - if (LD->isIndexed() || Base->isIndexed()) -return false; + if (auto Ld = dyn_cast(LD)) { +if (!Ld->isSimple()) + return false; +if (Ld->isIndexed()) + return false; + } + if (auto Ld = dyn_cast(Base)) +if (Ld->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getMemoryVT(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..c29cb424c7a4c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, } /// Parses tree in Ptr for base, index, offset addresses. -static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, - const SelectionDAG &DAG) { +template +static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); // (((B + I*M) + c)) + c ... @@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, bool IsIndexSignExt = false; // pre-inc/pre-dec ops are components of EA. - if (N->getAddressingMode() == ISD::PRE_INC) { -if (auto *C = dyn_cast(N->getOffset()))
[llvm-branch-commits] [libcxx] release/20.x: [libcxx] Provide locale conversions to tests through lit substitution (#105651) (PR #136449)
tstellar wrote: Do we still want to try to backport this one? https://github.com/llvm/llvm-project/pull/136449 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [LV] Fix crash when building partial reductions using types that aren't known scale factors (#136680) (PR #136863)
tstellar wrote: @NickGuy-Arm You can do the changes manually and create a new PR. https://github.com/llvm/llvm-project/pull/136863 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] release/20.x: [sanitizer_common] Remove interceptors for deprecated struct termio (#137403) (PR #137707)
tstellar wrote: @hpax Do you think we should backport this one anyway? https://github.com/llvm/llvm-project/pull/137707 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] release/20.x: [RISCV] Allow `Zicsr`/`Zifencei` to duplicate with `g` (#136842) (PR #137490)
https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/137490 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [release/20.x] Support z17 processor name and scheduler description (PR #135413)
https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/135413 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [release/20.x] Support z17 processor name and scheduler description (PR #135413)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/135413 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [release/20.x] Support z17 processor name and scheduler description (PR #135413)
github-actions[bot] wrote: @uweigand (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/135413 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 4370072 - [clang] Forward TPL of NestedNameSpecifier
Author: Jonas Hahnfeld Date: 2025-05-10T10:09:45-07:00 New Revision: 4370072022e5265d51b64182608e133277a24ac0 URL: https://github.com/llvm/llvm-project/commit/4370072022e5265d51b64182608e133277a24ac0 DIFF: https://github.com/llvm/llvm-project/commit/4370072022e5265d51b64182608e133277a24ac0.diff LOG: [clang] Forward TPL of NestedNameSpecifier This avoids type suffixes for integer constants when the type can be inferred from the template parameter, such as the unsigned parameter of A<1> and A<2> in the added test. Added: Modified: clang/lib/AST/NestedNameSpecifier.cpp clang/unittests/Tooling/QualTypeNamesTest.cpp Removed: diff --git a/clang/lib/AST/NestedNameSpecifier.cpp b/clang/lib/AST/NestedNameSpecifier.cpp index 76c77569da9fd..c043996f1ada3 100644 --- a/clang/lib/AST/NestedNameSpecifier.cpp +++ b/clang/lib/AST/NestedNameSpecifier.cpp @@ -283,13 +283,16 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy, case TypeSpec: { const auto *Record = dyn_cast_or_null(getAsRecordDecl()); -if (ResolveTemplateArguments && Record) { +const TemplateParameterList *TPL = nullptr; +if (Record) { + TPL = Record->getSpecializedTemplate()->getTemplateParameters(); + if (ResolveTemplateArguments) { // Print the type trait with resolved template parameters. Record->printName(OS, Policy); -printTemplateArgumentList( -OS, Record->getTemplateArgs().asArray(), Policy, -Record->getSpecializedTemplate()->getTemplateParameters()); +printTemplateArgumentList(OS, Record->getTemplateArgs().asArray(), + Policy, TPL); break; + } } const Type *T = getAsType(); @@ -313,8 +316,8 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy, TemplateName::Qualified::None); // Print the template argument list. - printTemplateArgumentList(OS, SpecType->template_arguments(), -InnerPolicy); + printTemplateArgumentList(OS, SpecType->template_arguments(), InnerPolicy, +TPL); } else if (const auto *DepSpecType = dyn_cast(T)) { // Print the template name without its corresponding @@ -322,7 +325,7 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy, OS << DepSpecType->getIdentifier()->getName(); // Print the template argument list. printTemplateArgumentList(OS, DepSpecType->template_arguments(), -InnerPolicy); +InnerPolicy, TPL); } else { // Print the type normally QualType(T, 0).print(OS, InnerPolicy); diff --git a/clang/unittests/Tooling/QualTypeNamesTest.cpp b/clang/unittests/Tooling/QualTypeNamesTest.cpp index 5ded64d4fcc8c..49c40d633ad4b 100644 --- a/clang/unittests/Tooling/QualTypeNamesTest.cpp +++ b/clang/unittests/Tooling/QualTypeNamesTest.cpp @@ -265,6 +265,102 @@ TEST(QualTypeNameTest, InlineNamespace) { TypeNameVisitor::Lang_CXX11); } +TEST(QualTypeNameTest, TemplatedClass) { + std::unique_ptr AST = + tooling::buildASTFromCode("template struct A {\n" +" template struct B {};\n" +"};\n" +"template struct A<1>;\n" +"template struct A<2u>;\n" +"template struct A<1>::B<3>;\n" +"template struct A<2u>::B<4u>;\n"); + + auto &Context = AST->getASTContext(); + auto &Policy = Context.getPrintingPolicy(); + auto getFullyQualifiedName = [&](QualType QT) { +return TypeName::getFullyQualifiedName(QT, Context, Policy); + }; + + auto *A = Context.getTranslationUnitDecl() +->lookup(&Context.Idents.get("A")) +.find_first(); + ASSERT_NE(A, nullptr); + + // A has two explicit instantiations: A<1> and A<2u> + auto ASpec = A->spec_begin(); + ASSERT_NE(ASpec, A->spec_end()); + auto *A1 = *ASpec; + ASpec++; + ASSERT_NE(ASpec, A->spec_end()); + auto *A2 = *ASpec; + + // Their type names follow the records. + QualType A1RecordTy = Context.getRecordType(A1); + EXPECT_EQ(getFullyQualifiedName(A1RecordTy), "A<1>"); + QualType A2RecordTy = Context.getRecordType(A2); + EXPECT_EQ(getFullyQualifiedName(A2RecordTy), "A<2U>"); + + // getTemplateSpecializationType() gives types that print the integral + // argument directly. + TemplateArgument Args1[] = { + {Context, llvm::APSInt::getUnsigned(1u), Context.UnsignedIntTy}}; + QualType A1TemplateSpecTy = + Context.getTemplateSpecializationType(TemplateName(A), Args1, A1RecordTy); + EXPECT_EQ(A1TemplateSpecTy.getAsString(),
[llvm-branch-commits] [clang] release/20.x: [clang] Forward TPL of NestedNameSpecifier (PR #137806)
https://github.com/tstellar closed https://github.com/llvm/llvm-project/pull/137806 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang] Forward TPL of NestedNameSpecifier (PR #137806)
https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/137806 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From d02434d4ca3ec809d7a72862408bb569f4043939 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6..39e9fdfa5e62b 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[
[llvm-branch-commits] [lld] release/20.x: [wasm-ld] Refactor WasmSym from static globals to per-link context (#134970) (PR #137620)
github-actions[bot] wrote: @anutosh491 (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. https://github.com/llvm/llvm-project/pull/137620 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][heatmap] Compute section utilization and partition score (PR #139193)
https://github.com/aaupov edited https://github.com/llvm/llvm-project/pull/139193 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][NFC] Disambiguate sample as basic/IP sample (PR #139350)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/139350 >From d32ce3f42af3da7ced12b0cc6b58e120deb83566 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 9 May 2025 21:13:27 -0700 Subject: [PATCH] rename data members Created using spr 1.3.4 --- bolt/include/bolt/Profile/DataReader.h | 6 +++--- bolt/lib/Profile/DataAggregator.cpp| 11 ++- bolt/lib/Profile/DataReader.cpp| 21 +++-- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h index b91efca085c8c..fbd1dd1e68792 100644 --- a/bolt/include/bolt/Profile/DataReader.h +++ b/bolt/include/bolt/Profile/DataReader.h @@ -390,7 +390,7 @@ class DataReader : public ProfileReaderBase { FuncMemData *getMemDataForNames(const std::vector &FuncNames); FuncBasicSampleData * - getFuncSampleData(const std::vector &FuncNames); + getFuncBasicSampleData(const std::vector &FuncNames); /// Return a vector of all FuncBranchData matching the list of names. /// Internally use fuzzy matching to match special names like LTO-generated @@ -433,7 +433,7 @@ class DataReader : public ProfileReaderBase { } using NamesToBranchesMapTy = std::map; - using NamesToSamplesMapTy = std::map; + using NamesToBasicSamplesMapTy = std::map; using NamesToMemEventsMapTy = std::map; using FuncsToBranchesMapTy = std::unordered_map; @@ -496,7 +496,7 @@ class DataReader : public ProfileReaderBase { unsigned Line{0}; unsigned Col{0}; NamesToBranchesMapTy NamesToBranches; - NamesToSamplesMapTy NamesToSamples; + NamesToBasicSamplesMapTy NamesToBasicSamples; NamesToMemEventsMapTy NamesToMemEvents; FuncsToBranchesMapTy FuncsToBranches; FuncsToMemDataMapTy FuncsToMemData; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 7a85297fe5f0e..88dea2cef4476 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -572,7 +572,8 @@ void DataAggregator::processProfile(BinaryContext &BC) { if (FuncBranchData *FBD = getBranchData(BF)) { BF.markProfiled(BinaryFunction::PF_LBR); BF.RawSampleCount = FBD->getNumExecutedBranches(); -} else if (FuncBasicSampleData *FSD = getFuncSampleData(BF.getNames())) { +} else if (FuncBasicSampleData *FSD = + getFuncBasicSampleData(BF.getNames())) { BF.markProfiled(BinaryFunction::PF_IP); BF.RawSampleCount = FSD->getSamples(); } @@ -644,11 +645,11 @@ bool DataAggregator::doBasicSample(BinaryFunction &OrigFunc, uint64_t Address, // Attach executed bytes to parent function in case of cold fragment. Func.SampleCountInBytes += Count * BlockSize; - auto I = NamesToSamples.find(Func.getOneName()); - if (I == NamesToSamples.end()) { + auto I = NamesToBasicSamples.find(Func.getOneName()); + if (I == NamesToBasicSamples.end()) { bool Success; StringRef LocName = getLocationName(Func, BAT); -std::tie(I, Success) = NamesToSamples.insert(std::make_pair( +std::tie(I, Success) = NamesToBasicSamples.insert(std::make_pair( Func.getOneName(), FuncBasicSampleData(LocName, FuncBasicSampleData::ContainerTy(; } @@ -2194,7 +2195,7 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { OutFile << " " << Entry.getKey(); OutFile << "\n"; -for (const auto &KV : NamesToSamples) { +for (const auto &KV : NamesToBasicSamples) { const FuncBasicSampleData &FSD = KV.second; for (const BasicSampleInfo &SI : FSD.Data) { writeLocation(SI.Loc); diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp index fda62e8c073ea..3376bef9d3fd7 100644 --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -562,7 +562,7 @@ float DataReader::evaluateProfileData(BinaryFunction &BF, } void DataReader::readBasicSampleData(BinaryFunction &BF) { - FuncBasicSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames()); + FuncBasicSampleData *SampleDataOrErr = getFuncBasicSampleData(BF.getNames()); if (!SampleDataOrErr) return; @@ -1090,10 +1090,10 @@ bool DataReader::hasMemData() { std::error_code DataReader::parseInNoLBRMode() { auto GetOrCreateFuncEntry = [&](StringRef Name) { -auto I = NamesToSamples.find(Name); -if (I == NamesToSamples.end()) { +auto I = NamesToBasicSamples.find(Name); +if (I == NamesToBasicSamples.end()) { bool Success; - std::tie(I, Success) = NamesToSamples.insert(std::make_pair( + std::tie(I, Success) = NamesToBasicSamples.insert(std::make_pair( Name, FuncBasicSampleData(Name, FuncBasicSampleData::ContainerTy(; assert(Success && "unexpected result of insert"); @@ -1142,8 +1142,8 @@ std::error_code DataReader::parseInNoLBRMode() { I->second.Data.emplace_back(std::move(MI)); } - for (auto &FuncSamples : Nam
[llvm-branch-commits] [llvm] [BOLT][NFC] Disambiguate sample as basic/IP sample (PR #139350)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/139350 >From d32ce3f42af3da7ced12b0cc6b58e120deb83566 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 9 May 2025 21:13:27 -0700 Subject: [PATCH] rename data members Created using spr 1.3.4 --- bolt/include/bolt/Profile/DataReader.h | 6 +++--- bolt/lib/Profile/DataAggregator.cpp| 11 ++- bolt/lib/Profile/DataReader.cpp| 21 +++-- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h index b91efca085c8c..fbd1dd1e68792 100644 --- a/bolt/include/bolt/Profile/DataReader.h +++ b/bolt/include/bolt/Profile/DataReader.h @@ -390,7 +390,7 @@ class DataReader : public ProfileReaderBase { FuncMemData *getMemDataForNames(const std::vector &FuncNames); FuncBasicSampleData * - getFuncSampleData(const std::vector &FuncNames); + getFuncBasicSampleData(const std::vector &FuncNames); /// Return a vector of all FuncBranchData matching the list of names. /// Internally use fuzzy matching to match special names like LTO-generated @@ -433,7 +433,7 @@ class DataReader : public ProfileReaderBase { } using NamesToBranchesMapTy = std::map; - using NamesToSamplesMapTy = std::map; + using NamesToBasicSamplesMapTy = std::map; using NamesToMemEventsMapTy = std::map; using FuncsToBranchesMapTy = std::unordered_map; @@ -496,7 +496,7 @@ class DataReader : public ProfileReaderBase { unsigned Line{0}; unsigned Col{0}; NamesToBranchesMapTy NamesToBranches; - NamesToSamplesMapTy NamesToSamples; + NamesToBasicSamplesMapTy NamesToBasicSamples; NamesToMemEventsMapTy NamesToMemEvents; FuncsToBranchesMapTy FuncsToBranches; FuncsToMemDataMapTy FuncsToMemData; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 7a85297fe5f0e..88dea2cef4476 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -572,7 +572,8 @@ void DataAggregator::processProfile(BinaryContext &BC) { if (FuncBranchData *FBD = getBranchData(BF)) { BF.markProfiled(BinaryFunction::PF_LBR); BF.RawSampleCount = FBD->getNumExecutedBranches(); -} else if (FuncBasicSampleData *FSD = getFuncSampleData(BF.getNames())) { +} else if (FuncBasicSampleData *FSD = + getFuncBasicSampleData(BF.getNames())) { BF.markProfiled(BinaryFunction::PF_IP); BF.RawSampleCount = FSD->getSamples(); } @@ -644,11 +645,11 @@ bool DataAggregator::doBasicSample(BinaryFunction &OrigFunc, uint64_t Address, // Attach executed bytes to parent function in case of cold fragment. Func.SampleCountInBytes += Count * BlockSize; - auto I = NamesToSamples.find(Func.getOneName()); - if (I == NamesToSamples.end()) { + auto I = NamesToBasicSamples.find(Func.getOneName()); + if (I == NamesToBasicSamples.end()) { bool Success; StringRef LocName = getLocationName(Func, BAT); -std::tie(I, Success) = NamesToSamples.insert(std::make_pair( +std::tie(I, Success) = NamesToBasicSamples.insert(std::make_pair( Func.getOneName(), FuncBasicSampleData(LocName, FuncBasicSampleData::ContainerTy(; } @@ -2194,7 +2195,7 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const { OutFile << " " << Entry.getKey(); OutFile << "\n"; -for (const auto &KV : NamesToSamples) { +for (const auto &KV : NamesToBasicSamples) { const FuncBasicSampleData &FSD = KV.second; for (const BasicSampleInfo &SI : FSD.Data) { writeLocation(SI.Loc); diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp index fda62e8c073ea..3376bef9d3fd7 100644 --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -562,7 +562,7 @@ float DataReader::evaluateProfileData(BinaryFunction &BF, } void DataReader::readBasicSampleData(BinaryFunction &BF) { - FuncBasicSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames()); + FuncBasicSampleData *SampleDataOrErr = getFuncBasicSampleData(BF.getNames()); if (!SampleDataOrErr) return; @@ -1090,10 +1090,10 @@ bool DataReader::hasMemData() { std::error_code DataReader::parseInNoLBRMode() { auto GetOrCreateFuncEntry = [&](StringRef Name) { -auto I = NamesToSamples.find(Name); -if (I == NamesToSamples.end()) { +auto I = NamesToBasicSamples.find(Name); +if (I == NamesToBasicSamples.end()) { bool Success; - std::tie(I, Success) = NamesToSamples.insert(std::make_pair( + std::tie(I, Success) = NamesToBasicSamples.insert(std::make_pair( Name, FuncBasicSampleData(Name, FuncBasicSampleData::ContainerTy(; assert(Success && "unexpected result of insert"); @@ -1142,8 +1142,8 @@ std::error_code DataReader::parseInNoLBRMode() { I->second.Data.emplace_back(std::move(MI)); } - for (auto &FuncSamples : Nam
[llvm-branch-commits] [clang] [clang-tools-extra] [llvm] [BOLT][heatmap] Use parsed basic/branch events (PR #136531)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/136531 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Build heatmap with pre-aggregated data (PR #138798)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/138798 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][heatmap] Compute section utilization and partition score (PR #139193)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/139193 >From 0d16d90e829f7ce753abb5e50eca8e46a80872dc Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 9 May 2025 14:48:04 -0700 Subject: [PATCH] update Created using spr 1.3.4 --- bolt/lib/Profile/Heatmap.cpp | 35 ++- bolt/test/X86/heatmap-preagg.test | 24 ++--- 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/bolt/lib/Profile/Heatmap.cpp b/bolt/lib/Profile/Heatmap.cpp index dd73f3a13a9d3..7b74c44427cb8 100644 --- a/bolt/lib/Profile/Heatmap.cpp +++ b/bolt/lib/Profile/Heatmap.cpp @@ -297,7 +297,7 @@ void Heatmap::printSectionHotness(StringRef FileName) const { void Heatmap::printSectionHotness(raw_ostream &OS) const { uint64_t NumTotalCounts = 0; StringMap SectionHotness; - StringMap SectionUtilization; + StringMap BucketUtilization; unsigned TextSectionIndex = 0; if (TextSections.empty()) @@ -312,42 +312,39 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const { UnmappedHotness += Frequency; }; - for (const auto [Bucket, Count] : Map) { -NumTotalCounts += Count; + for (const std::pair &KV : Map) { +NumTotalCounts += KV.second; // We map an address bucket to the first section (lowest address) // overlapping with that bucket. -auto Address = Bucket * BucketSize; +auto Address = KV.first * BucketSize; while (TextSectionIndex < TextSections.size() && Address >= TextSections[TextSectionIndex].EndAddress) TextSectionIndex++; if (TextSectionIndex >= TextSections.size() || Address + BucketSize < TextSections[TextSectionIndex].BeginAddress) { - RecordUnmappedBucket(Address, Count); + RecordUnmappedBucket(Address, KV.second); continue; } -StringRef Name = TextSections[TextSectionIndex].Name; -SectionHotness[Name] += Count; -++SectionUtilization[Name]; +SectionHotness[TextSections[TextSectionIndex].Name] += KV.second; +++BucketUtilization[TextSections[TextSectionIndex].Name]; } - auto getNumBuckets = [&](uint64_t Begin, uint64_t End) { -return End / BucketSize + !!(End % BucketSize) - Begin / BucketSize; - }; - assert(NumTotalCounts > 0 && "total number of heatmap buckets should be greater than 0"); OS << "Section Name, Begin Address, End Address, Percentage Hotness, " - << "Utilization Pct\n"; + << "Utilization Pct, Partition Score\n"; for (const auto [Name, Begin, End] : TextSections) { -const float RelHotness = 100. * SectionHotness[Name] / NumTotalCounts; -const float BucketUtilization = -100. * SectionUtilization[Name] / getNumBuckets(Begin, End); -OS << formatv("{0}, {1:x}, {2:x}, {3:f4}, {4:f4}\n", Name, Begin, End, - RelHotness, BucketUtilization); +const float Hotness = 1. * SectionHotness[Name] / NumTotalCounts; +const uint64_t NumBuckets = +End / BucketSize + !!(End % BucketSize) - Begin / BucketSize; +const float Utilization = 1. * BucketUtilization[Name] / NumBuckets; +const float PartitionScore = Hotness * Utilization; +OS << formatv("{0}, {1:x}, {2:x}, {3:f4}, {4:f4}, {5:f4}\n", Name, Begin, + End, 100. * Hotness, 100. * Utilization, PartitionScore); } if (UnmappedHotness > 0) -OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0\n", +OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0, 0\n", 100.0 * UnmappedHotness / NumTotalCounts); } } // namespace bolt diff --git a/bolt/test/X86/heatmap-preagg.test b/bolt/test/X86/heatmap-preagg.test index 660d37fd03cbe..48f1683892881 100644 --- a/bolt/test/X86/heatmap-preagg.test +++ b/bolt/test/X86/heatmap-preagg.test @@ -17,19 +17,19 @@ RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT --input-file %t2-section-hotn CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries CHECK-HEATMAP: HEATMAP: invalid traces: 1 -CHECK-SEC-HOT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct -CHECK-SEC-HOT-NEXT: .init, 0x401000, 0x40101b, 16.8545, 100. -CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667 -CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064 -CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0., 0. +CHECK-SEC-HOT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct, Partition Score +CHECK-SEC-HOT-NEXT: .init, 0x401000, 0x40101b, 16.8545, 100., 0.1685 +CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667, 0.0317 +CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064, 0.6671 +CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0., 0., 0. CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2 -CHECK-SEC-HOT-BAT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct -CHECK-SEC-HOT-BAT-NEXT: .init, 0x401000, 0x40101b, 17.2888, 10
[llvm-branch-commits] [llvm] [BOLT][heatmap] Compute section utilization and partition score (PR #139193)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/139193 >From 0d16d90e829f7ce753abb5e50eca8e46a80872dc Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 9 May 2025 14:48:04 -0700 Subject: [PATCH] update Created using spr 1.3.4 --- bolt/lib/Profile/Heatmap.cpp | 35 ++- bolt/test/X86/heatmap-preagg.test | 24 ++--- 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/bolt/lib/Profile/Heatmap.cpp b/bolt/lib/Profile/Heatmap.cpp index dd73f3a13a9d3..7b74c44427cb8 100644 --- a/bolt/lib/Profile/Heatmap.cpp +++ b/bolt/lib/Profile/Heatmap.cpp @@ -297,7 +297,7 @@ void Heatmap::printSectionHotness(StringRef FileName) const { void Heatmap::printSectionHotness(raw_ostream &OS) const { uint64_t NumTotalCounts = 0; StringMap SectionHotness; - StringMap SectionUtilization; + StringMap BucketUtilization; unsigned TextSectionIndex = 0; if (TextSections.empty()) @@ -312,42 +312,39 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const { UnmappedHotness += Frequency; }; - for (const auto [Bucket, Count] : Map) { -NumTotalCounts += Count; + for (const std::pair &KV : Map) { +NumTotalCounts += KV.second; // We map an address bucket to the first section (lowest address) // overlapping with that bucket. -auto Address = Bucket * BucketSize; +auto Address = KV.first * BucketSize; while (TextSectionIndex < TextSections.size() && Address >= TextSections[TextSectionIndex].EndAddress) TextSectionIndex++; if (TextSectionIndex >= TextSections.size() || Address + BucketSize < TextSections[TextSectionIndex].BeginAddress) { - RecordUnmappedBucket(Address, Count); + RecordUnmappedBucket(Address, KV.second); continue; } -StringRef Name = TextSections[TextSectionIndex].Name; -SectionHotness[Name] += Count; -++SectionUtilization[Name]; +SectionHotness[TextSections[TextSectionIndex].Name] += KV.second; +++BucketUtilization[TextSections[TextSectionIndex].Name]; } - auto getNumBuckets = [&](uint64_t Begin, uint64_t End) { -return End / BucketSize + !!(End % BucketSize) - Begin / BucketSize; - }; - assert(NumTotalCounts > 0 && "total number of heatmap buckets should be greater than 0"); OS << "Section Name, Begin Address, End Address, Percentage Hotness, " - << "Utilization Pct\n"; + << "Utilization Pct, Partition Score\n"; for (const auto [Name, Begin, End] : TextSections) { -const float RelHotness = 100. * SectionHotness[Name] / NumTotalCounts; -const float BucketUtilization = -100. * SectionUtilization[Name] / getNumBuckets(Begin, End); -OS << formatv("{0}, {1:x}, {2:x}, {3:f4}, {4:f4}\n", Name, Begin, End, - RelHotness, BucketUtilization); +const float Hotness = 1. * SectionHotness[Name] / NumTotalCounts; +const uint64_t NumBuckets = +End / BucketSize + !!(End % BucketSize) - Begin / BucketSize; +const float Utilization = 1. * BucketUtilization[Name] / NumBuckets; +const float PartitionScore = Hotness * Utilization; +OS << formatv("{0}, {1:x}, {2:x}, {3:f4}, {4:f4}, {5:f4}\n", Name, Begin, + End, 100. * Hotness, 100. * Utilization, PartitionScore); } if (UnmappedHotness > 0) -OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0\n", +OS << formatv("[unmapped], 0x0, 0x0, {0:f4}, 0, 0\n", 100.0 * UnmappedHotness / NumTotalCounts); } } // namespace bolt diff --git a/bolt/test/X86/heatmap-preagg.test b/bolt/test/X86/heatmap-preagg.test index 660d37fd03cbe..48f1683892881 100644 --- a/bolt/test/X86/heatmap-preagg.test +++ b/bolt/test/X86/heatmap-preagg.test @@ -17,19 +17,19 @@ RUN: FileCheck %s --check-prefix CHECK-SEC-HOT-BAT --input-file %t2-section-hotn CHECK-HEATMAP: PERF2BOLT: read 81 aggregated LBR entries CHECK-HEATMAP: HEATMAP: invalid traces: 1 -CHECK-SEC-HOT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct -CHECK-SEC-HOT-NEXT: .init, 0x401000, 0x40101b, 16.8545, 100. -CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667 -CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064 -CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0., 0. +CHECK-SEC-HOT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct, Partition Score +CHECK-SEC-HOT-NEXT: .init, 0x401000, 0x40101b, 16.8545, 100., 0.1685 +CHECK-SEC-HOT-NEXT: .plt, 0x401020, 0x4010b0, 4.7583, 66.6667, 0.0317 +CHECK-SEC-HOT-NEXT: .text, 0x4010b0, 0x401c25, 78.3872, 85.1064, 0.6671 +CHECK-SEC-HOT-NEXT: .fini, 0x401c28, 0x401c35, 0., 0., 0. CHECK-HEATMAP-BAT: PERF2BOLT: read 79 aggregated LBR entries CHECK-HEATMAP-BAT: HEATMAP: invalid traces: 2 -CHECK-SEC-HOT-BAT: Section Name, Begin Address, End Address, Percentage Hotness, Utilization Pct -CHECK-SEC-HOT-BAT-NEXT: .init, 0x401000, 0x40101b, 17.2888, 10
[llvm-branch-commits] [BOLT] Print heatmap section scores in perf2bolt (PR #139194)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/139194 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Build heatmap with pre-aggregated data (PR #138798)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/138798 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Drop perf2bolt cold samples diagnostic (PR #139337)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/139337 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Drop perf2bolt cold samples diagnostic (PR #139337)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/139337 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Print heatmap section scores in perf2bolt (PR #139194)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/139194 Rate limit · GitHub body { background-color: #f6f8fa; color: #24292e; font-family: -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol; font-size: 14px; line-height: 1.5; margin: 0; } .container { margin: 50px auto; max-width: 600px; text-align: center; padding: 0 24px; } a { color: #0366d6; text-decoration: none; } a:hover { text-decoration: underline; } h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; text-shadow: 0 1px 0 #fff; } p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; } ul { list-style: none; margin: 25px 0; padding: 0; } li { display: table-cell; font-weight: bold; width: 1%; } .logo { display: inline-block; margin-top: 35px; } .logo-img-2x { display: none; } @media only screen and (-webkit-min-device-pixel-ratio: 2), only screen and ( min--moz-device-pixel-ratio: 2), only screen and ( -o-min-device-pixel-ratio: 2/1), only screen and (min-device-pixel-ratio: 2), only screen and (min-resolution: 192dpi), only screen and (min-resolution: 2dppx) { .logo-img-1x { display: none; } .logo-img-2x { display: inline-block; } } #suggestions { margin-top: 35px; color: #ccc; } #suggestions a { color: #66; font-weight: 200; font-size: 14px; margin: 0 10px; } Whoa there! You have exceeded a secondary rate limit. Please wait a few minutes before you try again; in some cases this may take up to an hour. https://support.github.com/contact";>Contact Support — https://githubstatus.com";>GitHub Status — https://twitter.com/githubstatus";>@githubstatus ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Build heatmap with pre-aggregated data (PR #138798)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/138798 >From f6b275f682c598d5c026efcbd348c6e8a35c759b Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 6 May 2025 20:09:58 -0700 Subject: [PATCH 1/2] keep parsing build-id Created using spr 1.3.4 --- bolt/lib/Profile/DataAggregator.cpp | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index c5b9696dc79d0..a5ac87ee781b2 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -450,6 +450,14 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process, Error DataAggregator::preprocessProfile(BinaryContext &BC) { this->BC = &BC; + if (std::optional FileBuildID = BC.getFileBuildID()) { +outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; +processFileBuildID(*FileBuildID); + } else { +errs() << "BOLT-WARNING: build-id will not be checked because we could " + "not read one from input binary\n"; + } + auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) { errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf; exit(1); @@ -468,14 +476,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { goto heatmap; } - if (std::optional FileBuildID = BC.getFileBuildID()) { -outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; -processFileBuildID(*FileBuildID); - } else { -errs() << "BOLT-WARNING: build-id will not be checked because we could " - "not read one from input binary\n"; - } - if (BC.IsLinuxKernel) { // Current MMap parsing logic does not work with linux kernel. // MMap entries for linux kernel uses PERF_RECORD_MMAP >From 47f76b8dff0f639cac4b205de86224bfa50aa430 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 9 May 2025 15:01:28 -0700 Subject: [PATCH 2/2] simplify preprocessProfile Created using spr 1.3.4 --- bolt/lib/Profile/DataAggregator.cpp | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index c5b9696dc79d0..a259292bd0a29 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -518,13 +518,12 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { deleteTempFiles(); heatmap: - if (opts::HeatmapMode) { -if (std::error_code EC = printLBRHeatMap()) - return errorCodeToError(EC); -exit(0); - } + if (!opts::HeatmapMode) +return Error::success(); - return Error::success(); + if (std::error_code EC = printLBRHeatMap()) +return errorCodeToError(EC); + exit(0); } Error DataAggregator::readProfile(BinaryContext &BC) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Build heatmap with pre-aggregated data (PR #138798)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/138798 >From f6b275f682c598d5c026efcbd348c6e8a35c759b Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 6 May 2025 20:09:58 -0700 Subject: [PATCH 1/2] keep parsing build-id Created using spr 1.3.4 --- bolt/lib/Profile/DataAggregator.cpp | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index c5b9696dc79d0..a5ac87ee781b2 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -450,6 +450,14 @@ int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process, Error DataAggregator::preprocessProfile(BinaryContext &BC) { this->BC = &BC; + if (std::optional FileBuildID = BC.getFileBuildID()) { +outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; +processFileBuildID(*FileBuildID); + } else { +errs() << "BOLT-WARNING: build-id will not be checked because we could " + "not read one from input binary\n"; + } + auto ErrorCallback = [](int ReturnCode, StringRef ErrBuf) { errs() << "PERF-ERROR: return code " << ReturnCode << "\n" << ErrBuf; exit(1); @@ -468,14 +476,6 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { goto heatmap; } - if (std::optional FileBuildID = BC.getFileBuildID()) { -outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n"; -processFileBuildID(*FileBuildID); - } else { -errs() << "BOLT-WARNING: build-id will not be checked because we could " - "not read one from input binary\n"; - } - if (BC.IsLinuxKernel) { // Current MMap parsing logic does not work with linux kernel. // MMap entries for linux kernel uses PERF_RECORD_MMAP >From 47f76b8dff0f639cac4b205de86224bfa50aa430 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Fri, 9 May 2025 15:01:28 -0700 Subject: [PATCH 2/2] simplify preprocessProfile Created using spr 1.3.4 --- bolt/lib/Profile/DataAggregator.cpp | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index c5b9696dc79d0..a259292bd0a29 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -518,13 +518,12 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) { deleteTempFiles(); heatmap: - if (opts::HeatmapMode) { -if (std::error_code EC = printLBRHeatMap()) - return errorCodeToError(EC); -exit(0); - } + if (!opts::HeatmapMode) +return Error::success(); - return Error::success(); + if (std::error_code EC = printLBRHeatMap()) +return errorCodeToError(EC); + exit(0); } Error DataAggregator::readProfile(BinaryContext &BC) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From e9066eae746ce9eab372b247aea81e33accb2114 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 231b677c300a1..20d400c669693 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc8..6efcbb80c0ce6 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From bd488e460c7709c87e7a5bf82613dac9645b4b04 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 97 ++- llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++ 3 files changed, 153 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8eee7a4c61fe6..f88b4d5693979 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -6014,6 +6017,74 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +/// Either return the same load or provide appropriate casts +/// from the load and return that. +static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, + SDLoc dl, SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } + assert(FirstVT == WidenVT); + return LdOp; +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors"); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector() && + "Must be scalable"); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() && + "Expected equivalent element types"); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + + // Find the vector type that can load from. + std::optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0, + WidthDiff.getKnownMinValue()); + + if (!FirstVT) +return SDValue(); + + SmallVector MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT, + Chain, BasePtr, LD->getMemOperand()); + + // Load the element with one instruction. + SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, + FirstVTWidth, dl, DAG); + + // Modified the chain - switch anything that used the old chain to use + // the new
[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120598 >From bd488e460c7709c87e7a5bf82613dac9645b4b04 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 11:19:39 -0500 Subject: [PATCH] [SelectionDAG] Widen <2 x T> vector types for atomic load Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. commit-id:2894ccd1 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 97 ++- llvm/test/CodeGen/X86/atomic-load-store.ll| 78 +++ 3 files changed, 153 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 89ea7ef4dbe89..bdfa5f7741ad3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1062,6 +1062,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8eee7a4c61fe6..f88b4d5693979 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4625,6 +4625,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +Res = WidenVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::STEP_VECTOR: case ISD::SPLAT_VECTOR: @@ -6014,6 +6017,74 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +/// Either return the same load or provide appropriate casts +/// from the load and return that. +static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, + SDLoc dl, SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } + assert(FirstVT == WidenVT); + return LdOp; +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors"); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector() && + "Must be scalable"); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() && + "Expected equivalent element types"); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + + // Find the vector type that can load from. + std::optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0, + WidthDiff.getKnownMinValue()); + + if (!FirstVT) +return SDValue(); + + SmallVector MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT, + Chain, BasePtr, LD->getMemOperand()); + + // Load the element with one instruction. + SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, + FirstVTWidth, dl, DAG); + + // Modified the chain - switch anything that used the old chain to use + // the new
[llvm-branch-commits] [llvm] [X86] Manage atomic load of fp -> int promotion in DAG (PR #120386)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120386 >From e9066eae746ce9eab372b247aea81e33accb2114 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:38:23 -0500 Subject: [PATCH] [X86] Manage atomic load of fp -> int promotion in DAG When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size. commit-id:f9d761c5 --- llvm/lib/Target/X86/X86ISelLowering.cpp| 4 +++ llvm/test/CodeGen/X86/atomic-load-store.ll | 37 ++ 2 files changed, 41 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 231b677c300a1..20d400c669693 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2653,6 +2653,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(Op, MVT::f32, Promote); } + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); + setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::VECTOR_SHUFFLE, ISD::SCALAR_TO_VECTOR, diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index d23cfb89f9fc8..6efcbb80c0ce6 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -145,3 +145,40 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { %ret = load atomic <1 x i64>, ptr %x acquire, align 8 ret <1 x i64> %ret } + +define <1 x half> @atomic_vec1_half(ptr %x) { +; CHECK3-LABEL: atomic_vec1_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %cx +; CHECK0-NEXT:## implicit-def: $eax +; CHECK0-NEXT:movw %cx, %ax +; CHECK0-NEXT:## implicit-def: $xmm0 +; CHECK0-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK0-NEXT:retq + %ret = load atomic <1 x half>, ptr %x acquire, align 2 + ret <1 x half> %ret +} + +define <1 x float> @atomic_vec1_float(ptr %x) { +; CHECK-LABEL: atomic_vec1_float: +; CHECK: ## %bb.0: +; CHECK-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT:retq + %ret = load atomic <1 x float>, ptr %x acquire, align 4 + ret <1 x float> %ret +} + +define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec1_double_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 8 + ret <1 x double> %ret +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG] Legalize <1 x T> vector types for atomic load (PR #120385)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120385 >From 6b14da3539e6040f2da1c49cf35c84ef0a2b840d Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:37:17 -0500 Subject: [PATCH] [SelectionDAG] Legalize <1 x T> vector types for atomic load `load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 15 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 121 +- 3 files changed, 135 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 720393158aa5e..89ea7ef4dbe89 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -874,6 +874,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d0b69b88748a9..8eee7a4c61fe6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -64,6 +64,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_UnaryOpWithExtraInput(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::ATOMIC_LOAD: +R = ScalarizeVecRes_ATOMIC_LOAD(cast(N)); +break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; @@ -458,6 +461,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { return Op; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) { + SDValue Result = DAG.getAtomicLoad( + ISD::NON_EXTLOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(), + N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(), + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 5bce4401f7bdb..d23cfb89f9fc8 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0 define void @test1(ptr %ptr, i32 %val1) { ; CHECK-LABEL: test1: @@ -28,3 +28,120 @@ define i32 @test3(ptr %ptr) { %val = load atomic i32, ptr %ptr seq_cst, align 4 ret i32 %val } + +define <1 x i32> @atomic_vec1_i32(ptr %x) { +; CHECK-LABEL: atomic_vec1_i32: +; CHECK: ## %bb.0: +; CHECK-NEXT:movl (%rdi), %eax +; CHECK-NEXT:retq + %ret = load atomic <1 x i32>, ptr %x acquire, align 4 + ret <1 x i32> %ret +} + +define <1 x i8> @atomic_vec1_i8(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzbl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movb (%rdi), %al +; CHECK0-NEXT:retq + %ret = load atomic <1 x i8>, ptr %x acquire, align 1 + ret <1 x i8> %ret +} + +define <1 x i16> @atomic_vec1_i16(ptr %x) { +; CHECK3-LABEL: atomic_vec1_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movzwl (%rdi), %eax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movw (%rdi), %ax +; CHECK0-NEXT:retq + %ret = load atomic <1 x i16>, ptr %x acquire, align 2 + ret <1 x i16> %ret +} + +define <1 x i32> @atomic_vec1_i8_zext(ptr %x) { +; CHECK3-LABEL: atomic_ve
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From ce64f048fb5324e5b2ddd0e7198e2fb400a62d8e Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 - llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 158 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index c376de877ac7d..70f59eafc6ecb 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + auto *PtrTy = dyn_cast(I->getType()->getScalarType()); + auto *VTy = dyn_cast(I->getType()); + if (VTy && PtrTy && !Result->getType()->isVectorTy()) { +unsigned AS = PtrTy->getAddressSpace(); +Value *BC = Builder.CreateBitCast( +Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); +V = Builder.CreateIntToPtr(BC, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..eaa2ffd9b2731 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index f72970d12b6eb..d3027e799 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -382,6 +382,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec4_i8: ; CHECK3: ## %bb.0: @@ -405,6 +420,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_ptr270: +; CHECK: ## %b
[llvm-branch-commits] [llvm] [X86] Add atomic vector tests for unaligned >1 sizes. (PR #120387)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120387 >From d02434d4ca3ec809d7a72862408bb569f4043939 Mon Sep 17 00:00:00 2001 From: jofrn Date: Wed, 18 Dec 2024 03:40:32 -0500 Subject: [PATCH] [X86] Add atomic vector tests for unaligned >1 sizes. Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6 --- llvm/test/CodeGen/X86/atomic-load-store.ll | 253 + 1 file changed, 253 insertions(+) diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 6efcbb80c0ce6..39e9fdfa5e62b 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind { ret <1 x i64> %ret } +define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_ptr: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_ptr: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} + define <1 x half> @atomic_vec1_half(ptr %x) { ; CHECK3-LABEL: atomic_vec1_half: ; CHECK3: ## %bb.0: @@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind { %ret = load atomic <1 x double>, ptr %x acquire, align 8 ret <1 x double> %ret } + +define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_i64: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movq (%rsp), %rax +; CHECK3-NEXT:popq %rcx +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_i64: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq (%rsp), %rax +; CHECK0-NEXT:popq %rcx +; CHECK0-NEXT:retq + %ret = load atomic <1 x i64>, ptr %x acquire, align 4 + ret <1 x i64> %ret +} + +define <1 x double> @atomic_vec1_double(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec1_double: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec1_double: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <1 x double>, ptr %x acquire, align 4 + ret <1 x double> %ret +} + +define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { +; CHECK3-LABEL: atomic_vec2_i32: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:pushq %rax +; CHECK3-NEXT:movq %rdi, %rsi +; CHECK3-NEXT:movq %rsp, %rdx +; CHECK3-NEXT:movl $8, %edi +; CHECK3-NEXT:movl $2, %ecx +; CHECK3-NEXT:callq ___atomic_load +; CHECK3-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK3-NEXT:popq %rax +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i32: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:pushq %rax +; CHECK0-NEXT:movq %rdi, %rsi +; CHECK0-NEXT:movl $8, %edi +; CHECK0-NEXT:movq %rsp, %rdx +; CHECK0-NEXT:movl $2, %ecx +; CHECK0-NEXT:callq ___atomic_load +; CHECK0-NEXT:movq {{.*#+}} xmm0 = mem[0],zero +; CHECK0-NEXT:popq %rax +; CHECK0-NEXT:retq + %ret = load atomic <2 x i32>, ptr %x acquire, align 4 + ret <2 x i32> %ret +} + +define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_float_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[
[llvm-branch-commits] [llvm] [SelectionDAG] Split vector types for atomic load (PR #120640)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120640 >From 40b0a4ee9e008eeb749c49851bc52a66809a70a0 Mon Sep 17 00:00:00 2001 From: jofrn Date: Thu, 19 Dec 2024 16:25:55 -0500 Subject: [PATCH] [SelectionDAG] Split vector types for atomic load Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357 --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 37 llvm/test/CodeGen/X86/atomic-load-store.ll| 171 ++ 3 files changed, 209 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index bdfa5f7741ad3..d8f402f529632 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -960,6 +960,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f88b4d5693979..a3b30943c8e7d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1172,6 +1172,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_STEP_VECTOR(N, Lo, Hi); break; case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: +SplitVecRes_ATOMIC_LOAD(cast(N), Lo, Hi); +break; case ISD::LOAD: SplitVecRes_LOAD(cast(N), Lo, Hi); break; @@ -1421,6 +1424,40 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(LD->getExtensionType() == ISD::NON_EXTLOAD && + "Extended load during type legalization!"); + SDLoc dl(LD); + EVT VT = LD->getValueType(0); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + EVT MemIntVT = + EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits()); + SDValue ALD = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, MemIntVT, IntVT, Ch, + Ptr, LD->getMemOperand()); + + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + SDValue ExtractLo = DAG.getNode(ISD::TRUNCATE, dl, LoIntVT, ALD); + SDValue ExtractHi = + DAG.getNode(ISD::SRL, dl, IntVT, ALD, + DAG.getIntPtrConstant(VT.getSizeInBits() / 2, dl)); + ExtractHi = DAG.getNode(ISD::TRUNCATE, dl, HiIntVT, ExtractHi); + + Lo = DAG.getBitcast(LoVT, ExtractLo); + Hi = DAG.getBitcast(HiVT, ExtractHi); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1)); +} + void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 3cf9e3c1a8dfa..6e2e9d4b21891 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -205,6 +205,68 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) { ret <2 x float> %ret } +define <2 x half> @atomic_vec2_half(ptr %x) { +; CHECK3-LABEL: atomic_vec2_half: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movl (%rdi), %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm0 +; CHECK3-NEXT:shrl $16, %eax +; CHECK3-NEXT:pinsrw $0, %eax, %xmm1 +; CHECK3-NEXT:punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_half: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movl (%rdi), %eax +; CHECK0-NEXT:movl %eax, %ecx +; CHECK0-NEXT:shrl
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/120716 >From ce64f048fb5324e5b2ddd0e7198e2fb400a62d8e Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 20 Dec 2024 06:14:28 -0500 Subject: [PATCH] [AtomicExpand] Add bitcasts when expanding load atomic vector AtomicExpand fails for aligned `load atomic ` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af --- llvm/lib/CodeGen/AtomicExpandPass.cpp | 15 - llvm/test/CodeGen/ARM/atomic-load-store.ll| 51 +++ llvm/test/CodeGen/X86/atomic-load-store.ll| 30 + .../X86/expand-atomic-non-integer.ll | 65 +++ 4 files changed, 158 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index c376de877ac7d..70f59eafc6ecb 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -2066,9 +2066,18 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall( I->replaceAllUsesWith(V); } else if (HasResult) { Value *V; -if (UseSizedLibcall) - V = Builder.CreateBitOrPointerCast(Result, I->getType()); -else { +if (UseSizedLibcall) { + // Add bitcasts from Result's scalar type to I's vector type + auto *PtrTy = dyn_cast(I->getType()->getScalarType()); + auto *VTy = dyn_cast(I->getType()); + if (VTy && PtrTy && !Result->getType()->isVectorTy()) { +unsigned AS = PtrTy->getAddressSpace(); +Value *BC = Builder.CreateBitCast( +Result, VTy->getWithNewType(DL.getIntPtrType(Ctx, AS))); +V = Builder.CreateIntToPtr(BC, I->getType()); + } else +V = Builder.CreateBitOrPointerCast(Result, I->getType()); +} else { V = Builder.CreateAlignedLoad(I->getType(), AllocaResult, AllocaAlignment); Builder.CreateLifetimeEnd(AllocaResult, SizeVal64); diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll index 560dfde356c29..eaa2ffd9b2731 100644 --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) { store atomic double %val1, ptr %ptr seq_cst, align 8 ret void } + +define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 { +; ARM-LABEL: atomic_vec1_ptr: +; ARM: @ %bb.0: +; ARM-NEXT:ldr r0, [r0] +; ARM-NEXT:dmb ish +; ARM-NEXT:bx lr +; +; ARMOPTNONE-LABEL: atomic_vec1_ptr: +; ARMOPTNONE: @ %bb.0: +; ARMOPTNONE-NEXT:ldr r0, [r0] +; ARMOPTNONE-NEXT:dmb ish +; ARMOPTNONE-NEXT:bx lr +; +; THUMBTWO-LABEL: atomic_vec1_ptr: +; THUMBTWO: @ %bb.0: +; THUMBTWO-NEXT:ldr r0, [r0] +; THUMBTWO-NEXT:dmb ish +; THUMBTWO-NEXT:bx lr +; +; THUMBONE-LABEL: atomic_vec1_ptr: +; THUMBONE: @ %bb.0: +; THUMBONE-NEXT:push {r7, lr} +; THUMBONE-NEXT:movs r1, #0 +; THUMBONE-NEXT:mov r2, r1 +; THUMBONE-NEXT:bl __sync_val_compare_and_swap_4 +; THUMBONE-NEXT:pop {r7, pc} +; +; ARMV4-LABEL: atomic_vec1_ptr: +; ARMV4: @ %bb.0: +; ARMV4-NEXT:push {r11, lr} +; ARMV4-NEXT:mov r1, #2 +; ARMV4-NEXT:bl __atomic_load_4 +; ARMV4-NEXT:pop {r11, lr} +; ARMV4-NEXT:mov pc, lr +; +; ARMV6-LABEL: atomic_vec1_ptr: +; ARMV6: @ %bb.0: +; ARMV6-NEXT:ldr r0, [r0] +; ARMV6-NEXT:mov r1, #0 +; ARMV6-NEXT:mcr p15, #0, r1, c7, c10, #5 +; ARMV6-NEXT:bx lr +; +; THUMBM-LABEL: atomic_vec1_ptr: +; THUMBM: @ %bb.0: +; THUMBM-NEXT:ldr r0, [r0] +; THUMBM-NEXT:dmb sy +; THUMBM-NEXT:bx lr + %ret = load atomic <1 x ptr>, ptr %x acquire, align 4 + ret <1 x ptr> %ret +} diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index f72970d12b6eb..d3027e799 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -382,6 +382,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { ret <2 x i32> %ret } +define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec2_ptr_align: +; CHECK: ## %bb.0: +; CHECK-NEXT:pushq %rax +; CHECK-NEXT:movl $2, %esi +; CHECK-NEXT:callq ___atomic_load_16 +; CHECK-NEXT:movq %rdx, %xmm1 +; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT:popq %rax +; CHECK-NEXT:retq + %ret = load atomic <2 x ptr>, ptr %x acquire, align 16 + ret <2 x ptr> %ret +} + define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { ; CHECK3-LABEL: atomic_vec4_i8: ; CHECK3: ## %bb.0: @@ -405,6 +420,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ret <4 x i16> %ret } +define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind { +; CHECK-LABEL: atomic_vec4_ptr270: +; CHECK: ## %b
[llvm-branch-commits] [llvm] [SelectionDAG][X86] Remove unused elements from atomic vector. (PR #125432)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/125432 >From bf8fc80f870022c2a42d01a500e2b16d648dd376 Mon Sep 17 00:00:00 2001 From: jofrn Date: Fri, 31 Jan 2025 13:12:56 -0500 Subject: [PATCH] [SelectionDAG][X86] Remove unused elements from atomic vector. After splitting, all elements are created. The two components must be found by looking at the upper and lower half of EXTRACT_ELEMENT. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that unused elements can be removed. commit-id:b83937a8 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 ++- .../SelectionDAGAddressAnalysis.cpp | 30 ++-- llvm/lib/Target/X86/X86ISelLowering.cpp | 59 +-- llvm/test/CodeGen/X86/atomic-load-store.ll| 149 ++ 5 files changed, 90 insertions(+), 172 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 87b6914f8a0ee..ab8bb517e6ae4 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1873,7 +1873,7 @@ class SelectionDAG { /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); + SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their @@ -2311,7 +2311,7 @@ class SelectionDAG { /// merged. Check that both are nonvolatile and if LD is loading /// 'Bytes' bytes from a location that is 'Dist' units away from the /// location that the 'Base' load is loading from. - bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, + bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base, unsigned Bytes, int Dist) const; /// Infer alignment of a load / store address. Return std::nullopt if it diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index bbf1b0fd590ef..38b22078c8c44 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12215,7 +12215,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain, return TokenFactor; } -SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, +SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp) { assert(isa(NewMemOp.getNode()) && "Expected a memop node"); SDValue OldChain = SDValue(OldLoad, 1); @@ -12905,17 +12905,21 @@ std::pair SelectionDAG::UnrollVectorOverflowOp( getBuildVector(NewOvVT, dl, OvScalars)); } -bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, - LoadSDNode *Base, +bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD, + MemSDNode *Base, unsigned Bytes, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; - // TODO: probably too restrictive for atomics, revisit - if (!LD->isSimple()) -return false; - if (LD->isIndexed() || Base->isIndexed()) -return false; + if (auto Ld = dyn_cast(LD)) { +if (!Ld->isSimple()) + return false; +if (Ld->isIndexed()) + return false; + } + if (auto Ld = dyn_cast(Base)) +if (Ld->isIndexed()) + return false; if (LD->getChain() != Base->getChain()) return false; EVT VT = LD->getMemoryVT(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index f2ab88851b780..c29cb424c7a4c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize, } /// Parses tree in Ptr for base, index, offset addresses. -static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, - const SelectionDAG &DAG) { +template +static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) { SDValue Ptr = N->getBasePtr(); // (((B + I*M) + c)) + c ... @@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N, bool IsIndexSignExt = false; // pre-inc/pre-dec ops are components of EA. - if (N->getAddressingMode() == ISD::PRE_INC) { -if (auto *C = dyn_cast(N->getOffset()))
[llvm-branch-commits] [llvm] [X86] Remove extra MOV after widening atomic load (PR #138635)
https://github.com/jofrn updated https://github.com/llvm/llvm-project/pull/138635 >From 539584cc6a26cb072ed8fa199a873256543d301a Mon Sep 17 00:00:00 2001 From: jofernau_amdeng Date: Tue, 6 May 2025 01:48:11 -0400 Subject: [PATCH] [X86] Remove extra MOV after widening atomic load This change adds patterns to optimize out an extra MOV present after widening the atomic load. commit-id:45989503 --- llvm/lib/Target/X86/X86InstrCompiler.td| 7 llvm/test/CodeGen/X86/atomic-load-store.ll | 40 -- llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index efa1e8bd7f3e3..786d0567280f9 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1204,6 +1204,13 @@ def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>; def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>; def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>; +def : Pat<(v4i32 (scalar_to_vector (i32 (zext (i16 (atomic_load_16 addr:$src)), + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8> +def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src, + (MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16> +def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src, + (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float> + // Floating point loads/stores. def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>; diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll index 9ee8b4fc5ac7f..3cf9e3c1a8dfa 100644 --- a/llvm/test/CodeGen/X86/atomic-load-store.ll +++ b/llvm/test/CodeGen/X86/atomic-load-store.ll @@ -165,11 +165,15 @@ define <2 x i8> @atomic_vec2_i8(ptr %x) { } define <2 x i16> @atomic_vec2_i16(ptr %x) { -; CHECK-LABEL: atomic_vec2_i16: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec2_i16: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec2_i16: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <2 x i16>, ptr %x acquire, align 4 ret <2 x i16> %ret } @@ -177,8 +181,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) { define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { ; CHECK-LABEL: atomic_vec2_ptr270: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x ptr addrspace(270)>, ptr %x acquire, align 8 ret <2 x ptr addrspace(270)> %ret @@ -187,8 +190,7 @@ define <2 x ptr addrspace(270)> @atomic_vec2_ptr270(ptr %x) { define <2 x i32> @atomic_vec2_i32_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_i32_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x i32>, ptr %x acquire, align 8 ret <2 x i32> %ret @@ -197,8 +199,7 @@ define <2 x i32> @atomic_vec2_i32_align(ptr %x) { define <2 x float> @atomic_vec2_float_align(ptr %x) { ; CHECK-LABEL: atomic_vec2_float_align: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <2 x float>, ptr %x acquire, align 8 ret <2 x float> %ret @@ -354,11 +355,15 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind { } define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { -; CHECK-LABEL: atomic_vec4_i8: -; CHECK: ## %bb.0: -; CHECK-NEXT:movl (%rdi), %eax -; CHECK-NEXT:movd %eax, %xmm0 -; CHECK-NEXT:retq +; CHECK3-LABEL: atomic_vec4_i8: +; CHECK3: ## %bb.0: +; CHECK3-NEXT:movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK3-NEXT:retq +; +; CHECK0-LABEL: atomic_vec4_i8: +; CHECK0: ## %bb.0: +; CHECK0-NEXT:movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK0-NEXT:retq %ret = load atomic <4 x i8>, ptr %x acquire, align 4 ret <4 x i8> %ret } @@ -366,8 +371,7 @@ define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind { define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind { ; CHECK-LABEL: atomic_vec4_i16: ; CHECK: ## %bb.0: -; CHECK-NEXT:movq (%rdi), %rax -; CHECK-NEXT:movq %rax, %xmm0 +; CHECK-NEXT:movq (%rdi), %xmm0 ; CHECK-NEXT:retq %ret = load atomic <4 x i16>, ptr %x acquire, align 8 ret <4 x i16> %ret diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unorder