[llvm-branch-commits] [llvm] AMDGPU: Custom expand flat cmpxchg which may access private (PR #109410)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/109410

>From 831b4a6dde281d7cd3b95557c15cb417d278d568 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 14 Aug 2024 13:57:14 +0400
Subject: [PATCH 1/2] AMDGPU: Custom expand flat cmpxchg which may access
 private

64-bit flat cmpxchg instructions do not work correctly for scratch
addresses, and need to be expanded as non-atomic.

Allow custom expansion of cmpxchg in AtomicExpand, as is
already the case for atomicrmw.
---
 llvm/include/llvm/CodeGen/TargetLowering.h|5 +
 .../llvm/Transforms/Utils/LowerAtomic.h   |7 +
 llvm/lib/CodeGen/AtomicExpandPass.cpp |4 +
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  146 ++-
 llvm/lib/Target/AMDGPU/SIISelLowering.h   |3 +
 llvm/lib/Transforms/Utils/LowerAtomic.cpp |   21 +-
 llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll  | 1027 +++--
 ...expand-atomicrmw-flat-noalias-addrspace.ll |6 +-
 ...expand-atomicrmw-integer-ops-0-to-add-0.ll |6 +-
 .../expand-cmpxchg-flat-maybe-private.ll  |  104 +-
 10 files changed, 1161 insertions(+), 168 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h 
b/llvm/include/llvm/CodeGen/TargetLowering.h
index 3842af56e6b3d7..678b169568afcf 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2204,6 +2204,11 @@ class TargetLoweringBase {
 "Generic atomicrmw expansion unimplemented on this target");
   }
 
+  /// Perform a cmpxchg expansion using a target-specific method.
+  virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const {
+llvm_unreachable("Generic cmpxchg expansion unimplemented on this target");
+  }
+
   /// Perform a bit test atomicrmw using a target-specific intrinsic. This
   /// represents the combined bit test intrinsic which will be lowered at a 
late
   /// stage by the backend.
diff --git a/llvm/include/llvm/Transforms/Utils/LowerAtomic.h 
b/llvm/include/llvm/Transforms/Utils/LowerAtomic.h
index b25b281667f9cb..295c2bd2b4b47e 100644
--- a/llvm/include/llvm/Transforms/Utils/LowerAtomic.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerAtomic.h
@@ -23,6 +23,13 @@ class IRBuilderBase;
 /// Convert the given Cmpxchg into primitive load and compare.
 bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI);
 
+/// Emit IR to implement the given cmpxchg operation on values in registers,
+/// returning the new value.
+std::pair buildAtomicCmpXchgValue(IRBuilderBase &Builder,
+Value *Ptr, Value *Cmp,
+Value *Val,
+Align Alignment);
+
 /// Convert the given RMWI into primitive load and stores,
 /// assuming that doing so is legal. Return true if the lowering
 /// succeeds.
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp 
b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index b5eca44cb611a3..71e0fd2b7167a2 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1672,6 +1672,10 @@ bool 
AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 return true;
   case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
 return lowerAtomicCmpXchgInst(CI);
+  case TargetLoweringBase::AtomicExpansionKind::Expand: {
+TLI->emitExpandAtomicCmpXchg(CI);
+return true;
+  }
   }
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a4962399fc2815..17bd5c2343a4f1 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16504,9 +16504,21 @@ 
SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
 
 TargetLowering::AtomicExpansionKind
 SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const 
{
-  return CmpX->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
- ? AtomicExpansionKind::NotAtomic
- : AtomicExpansionKind::None;
+  unsigned AddrSpace = CmpX->getPointerAddressSpace();
+  if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
+return AtomicExpansionKind::NotAtomic;
+
+  if (AddrSpace != AMDGPUAS::FLAT_ADDRESS || !flatInstrMayAccessPrivate(CmpX))
+return AtomicExpansionKind::None;
+
+  const DataLayout &DL = CmpX->getDataLayout();
+
+  Type *ValTy = CmpX->getNewValOperand()->getType();
+
+  // If a 64-bit flat atomic may alias private, we need to avoid using the
+  // atomic in the private case.
+  return DL.getTypeSizeInBits(ValTy) == 64 ? AtomicExpansionKind::Expand
+   : AtomicExpansionKind::None;
 }
 
 const TargetRegisterClass *
@@ -16670,40 +16682,8 @@ bool SITargetLowering::checkForPhysRegDependency(
   return false;
 }
 
-void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
-  AtomicRMWInst::BinOp Op = AI->getOperation();
-
-  if (Op == AtomicRMWInst::Sub || Op ==

[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/102599

>From a2719d4938a1eaf135c275257b1b6c0318ccc801 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 9 Aug 2024 14:51:41 +0400
Subject: [PATCH] AMDGPU: Add noalias.addrspace metadata when autoupgrading
 atomic intrinsics

This will be needed to continue generating the raw instruction in the flat case.
---
 llvm/lib/IR/AutoUpgrade.cpp| 13 -
 llvm/test/Bitcode/amdgcn-atomic.ll | 45 --
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 6f833acd6dbc0d..ca2602e56136a7 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -34,9 +34,11 @@
 #include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
@@ -4235,13 +4237,22 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef 
Name, CallBase *CI,
   AtomicRMWInst *RMW =
   Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
 
-  if (PtrTy->getAddressSpace() != 3) {
+  unsigned AddrSpace = PtrTy->getAddressSpace();
+  if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
   RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
   }
 
+  if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
+MDBuilder MDB(F->getContext());
+MDNode *RangeNotPrivate =
+MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
+APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
+RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
+  }
+
   if (IsVolatile)
 RMW->setVolatile(true);
 
diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll 
b/llvm/test/Bitcode/amdgcn-atomic.ll
index d642372799f56b..87ca1e3a617ed9 100644
--- a/llvm/test/Bitcode/amdgcn-atomic.ll
+++ b/llvm/test/Bitcode/amdgcn-atomic.ll
@@ -2,10 +2,10 @@
 
 
 define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, 
i32 0, i1 false)
 
-  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, 
i32 43, i32 0, i32 0, i1 false)
 
   ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 
syncscope("agent") seq_cst, align 4{{$}}
@@ -26,10 +26,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, 
ptr addrspace(3) %ptr
 }
 
 define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, 
i32 0, i1 false)
 
-  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, 
i32 43, i32 0, i32 0, i1 false)
 
   ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 
syncscope("agent") seq_cst, align 4{{$}}
@@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, 
ptr addrspace(3) %ptr
 
 ; Test some invalid ordering handling
 define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") 
seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") 
seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 
-1, i32 0, i1 true)
 
-  ; CHECK:

[llvm-branch-commits] [llvm] AMDGPU: Add baseline tests for cmpxchg custom expansion (PR #109408)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/109408

>From caecd58b94c52b5568fc0014dad1c51796e4d36e Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Thu, 12 Sep 2024 12:44:04 +0400
Subject: [PATCH] AMDGPU: Add baseline tests for cmpxchg custom expansion

We need a non-atomic path if flat may access private.
---
 .../AMDGPU/flat_atomics_i64_noprivate.ll  |  34 +--
 .../AtomicExpand/AMDGPU/expand-atomic-mmra.ll |  12 +-
 ...and-atomic-rmw-fadd-flat-specialization.ll |   4 +-
 ...expand-atomicrmw-flat-noalias-addrspace.ll | 149 -
 .../expand-cmpxchg-flat-maybe-private.ll  | 208 ++
 5 files changed, 382 insertions(+), 25 deletions(-)
 create mode 100644 
llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll

diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll 
b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll
index c0b3adce81342d..f4fe003a34d3fb 100644
--- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll
@@ -5088,7 +5088,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr 
%out, i64 %in, i64 %old
 ; GFX12-NEXT:s_endpgm
 entry:
   %gep = getelementptr i64, ptr %out, i64 4
-  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   ret void
 }
 
@@ -5145,7 +5145,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr 
%out, i64 %in, i64 %ol
 ; GFX12-NEXT:s_endpgm
 entry:
   %gep = getelementptr i64, ptr %out, i64 9000
-  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   ret void
 }
 
@@ -5206,7 +5206,7 @@ define amdgpu_kernel void 
@atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6
 ; GFX12-NEXT:s_endpgm
 entry:
   %gep = getelementptr i64, ptr %out, i64 4
-  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   %extract0 = extractvalue { i64, i1 } %val, 0
   store i64 %extract0, ptr %out2
   ret void
@@ -5270,7 +5270,7 @@ define amdgpu_kernel void 
@atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i
 entry:
   %ptr = getelementptr i64, ptr %out, i64 %index
   %gep = getelementptr i64, ptr %ptr, i64 4
-  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   ret void
 }
 
@@ -5344,7 +5344,7 @@ define amdgpu_kernel void 
@atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o
 entry:
   %ptr = getelementptr i64, ptr %out, i64 %index
   %gep = getelementptr i64, ptr %ptr, i64 4
-  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   %extract0 = extractvalue { i64, i1 } %val, 0
   store i64 %extract0, ptr %out2
   ret void
@@ -5398,7 +5398,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64(ptr %out, 
i64 %in, i64 %old) {
 ; GFX12-NEXT:global_inv scope:SCOPE_DEV
 ; GFX12-NEXT:s_endpgm
 entry:
-  %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   ret void
 }
 
@@ -5454,7 +5454,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret(ptr 
%out, ptr %out2, i64 %in,
 ; GFX12-NEXT:flat_store_b64 v[2:3], v[0:1]
 ; GFX12-NEXT:s_endpgm
 entry:
-  %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %out, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   %extract0 = extractvalue { i64, i1 } %val, 0
   store i64 %extract0, ptr %out2
   ret void
@@ -5513,7 +5513,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr 
%out, i64 %in, i64 %ind
 ; GFX12-NEXT:s_endpgm
 entry:
   %ptr = getelementptr i64, ptr %out, i64 %index
-  %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst, !noalias.addrspace !0
   ret void
 }
 
@@ -5582,7 +5582,7 @@ define amdgpu_kernel void 
@atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6
 ; GFX12-NEXT:s_endpgm
 entry:
   %ptr = getelementptr i64, ptr %out, i64 %index
-  %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") 
seq_cst seq_cst
+  %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") 
seq_cst se

[llvm-branch-commits] [mlir] b3cdd66 - Revert "[MLIR][TilingInterface] Extend consumer fusion for multi-use of produ…"

2024-09-30 Thread via llvm-branch-commits

Author: Abhishek Varma
Date: 2024-09-30T14:51:23+05:30
New Revision: b3cdd66549a17e8ab83b23117d0a1fc9feb50534

URL: 
https://github.com/llvm/llvm-project/commit/b3cdd66549a17e8ab83b23117d0a1fc9feb50534
DIFF: 
https://github.com/llvm/llvm-project/commit/b3cdd66549a17e8ab83b23117d0a1fc9feb50534.diff

LOG: Revert "[MLIR][TilingInterface] Extend consumer fusion for multi-use of 
produ…"

This reverts commit b8c974f09391d78035928c599a911009bbe49e85.

Added: 


Modified: 
mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir

Removed: 




diff  --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp 
b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
index 50cfd29e6bf907..7cfd772a72b175 100644
--- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
@@ -1481,29 +1481,21 @@ checkAssumptionForFusingConsumer(tensor::InsertSliceOp 
candidateSliceOp) {
 /// failure otherwise.
 static FailureOr getConsumerFromUses(Value val,
   Block *containingOpBlock) {
-  // Check that the value has exactly one use which isn't a scf.yield or a
-  // tensor.parallel_insert_slice op.
-  OpOperand *operand = nullptr;
-  for (OpOperand &opOperand : val.getUses()) {
-Operation *consumerOp = opOperand.getOwner();
-if (isa(consumerOp))
-  continue;
-if (operand)
-  return failure();
-// TODO: We have to init result of consumer before scf.for, use
-//   DestinationStyleOpInterface to get result shape from init for now.
-//   Add support for other op such as op has InferTypeOpInterface.
-if (!isa(consumerOp) ||
-!isa(consumerOp))
-  return failure();
-if (containingOpBlock != consumerOp->getBlock())
-  return failure();
-operand = &opOperand;
-  }
-
-  if (operand)
-return operand;
-  return failure();
+  // Step 1. Check that the value has exactly one use.
+  if (!llvm::hasSingleElement(val.getUses()))
+return failure();
+  // Step 2. Get uses.
+  OpOperand &operand = (*val.getUses().begin());
+  Operation *consumerOp = operand.getOwner();
+  // TODO: We have to init result of consumer before scf.for, use
+  //   DestinationStyleOpInterface to get result shape from init for now.
+  //   Add support for other op such as op has InferTypeOpInterface.
+  if (!isa(consumerOp) ||
+  !isa(consumerOp))
+return failure();
+  if (containingOpBlock != consumerOp->getBlock())
+return failure();
+  return &operand;
 }
 
 /// Find the perfectly nested loops outside of given loop(included) sorted from

diff  --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir 
b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir
index f5f703d95e2d5b..fdefdcc453ae7a 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-consumer.mlir
@@ -437,74 +437,3 @@ module attributes {transform.with_named_sequence} {
 //  CHECK: scf.yield %[[LOOP_RESULT2]]#0, %[[LOOP_RESULT2]]#1 :
 //  CHECK:   }
 //  CHECK:   return %[[LOOP_RESULT1]]#1 :
-
-// -
-
-// This test case checks fusion of consumer even if the producer has multiple 
uses.
-// The multiple uses of the producer essentially means that besides the 
consumer
-// op in concern, the only other uses of the producer are allowed in :-
-// 1. scf.yield
-// 2. tensor.parallel_insert_slice
-
-module {
-  module {
-func.func @fuse_consumer_for_multi_use_producer(%arg0: 
tensor<256x512xf32>, %arg1: tensor<512x256xf32>, %arg2: tensor<256x256xf32>) -> 
(tensor<256x256xf32>, tensor<256x256xf32>) {
-  %c0 = arith.constant 0 : index
-  %c64 = arith.constant 64 : index
-  %c256 = arith.constant 256 : index
-  %cst = arith.constant 0.00e+00 : f32
-  %0 = tensor.empty() : tensor<256x256xf32>
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> 
tensor<256x256xf32>
-  %2:2 = scf.for %arg3 = %c0 to %c256 step %c64 iter_args(%arg4 = %1, 
%arg5 = %arg2) -> (tensor<256x256xf32>, tensor<256x256xf32>) {
-%3 = scf.for %arg6 = %c0 to %c256 step %c64 iter_args(%arg7 = %arg4) 
-> (tensor<256x256xf32>) {
-  %extracted_slice = tensor.extract_slice %arg7[%arg3, %arg6] [64, 64] 
[1, 1] : tensor<256x256xf32> to tensor<64x64xf32>
-  %extracted_slice_0 = tensor.extract_slice %arg0[%arg3, 0] [64, 512] 
[1, 1] : tensor<256x512xf32> to tensor<64x512xf32>
-  %extracted_slice_1 = tensor.extract_slice %arg1[0, %arg6] [512, 64] 
[1, 1] : tensor<512x256xf32> to tensor<512x64xf32>
-  %5 = linalg.matmul ins(%extracted_slice_0, %extracted_slice_1 : 
tensor<64x512xf32>, tensor<512x64xf32>) outs(%extracted_slice : 
tensor<64x64xf32>) -> tensor<64x64xf32>
-  %ins

[llvm-branch-commits] [llvm] release/19.x: AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256) (PR #110470)

2024-09-30 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/110470
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256) (PR #110470)

2024-09-30 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/110470

Backport e9d12a6b451bd403d95105aa976a011dc821f126 
83fe85115da9dc25fa270d2ea8140113c8d49670

Requested by: @arsenm

>From 4477e7b862c603da7586598248e4ea0c60c81407 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Mon, 30 Sep 2024 10:39:17 +0200
Subject: [PATCH 1/2] AMDGPU: Add test for 16 bit unsigned scratch offsets
 (#110255)

Large scratch offset with one on highest bit selected as negative,
negative offset has same binary representation in 16 bits as large
unsigned offset.

(cherry picked from commit e9d12a6b451bd403d95105aa976a011dc821f126)
---
 .../CodeGen/AMDGPU/GlobalISel/flat-scratch.ll | 239 ++
 llvm/test/CodeGen/AMDGPU/flat-scratch.ll  | 444 ++
 2 files changed, 683 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index a5e4151bf36958..47ca6f416b02b0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -1513,4 +1513,243 @@ bb:
   ret void
 }
 
+define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr 
addrspace(5) inreg %sgpr_base) {
+; GFX9-LABEL: sgpr_base_large_offset:
+; GFX9:   ; %bb.0: ; %entry
+; GFX9-NEXT:s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:s_add_u32 s0, s2, 0xffe8
+; GFX9-NEXT:scratch_load_dword v2, off, s0
+; GFX9-NEXT:s_waitcnt vmcnt(0)
+; GFX9-NEXT:global_store_dword v[0:1], v2, off
+; GFX9-NEXT:s_endpgm
+;
+; GFX10-LABEL: sgpr_base_large_offset:
+; GFX10:   ; %bb.0: ; %entry
+; GFX10-NEXT:s_add_u32 s0, s0, s5
+; GFX10-NEXT:s_addc_u32 s1, s1, 0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:s_add_u32 s0, s2, 0xffe8
+; GFX10-NEXT:scratch_load_dword v2, off, s0
+; GFX10-NEXT:s_waitcnt vmcnt(0)
+; GFX10-NEXT:global_store_dword v[0:1], v2, off
+; GFX10-NEXT:s_endpgm
+;
+; GFX940-LABEL: sgpr_base_large_offset:
+; GFX940:   ; %bb.0: ; %entry
+; GFX940-NEXT:s_add_u32 s0, s0, 0xffe8
+; GFX940-NEXT:scratch_load_dword v2, off, s0
+; GFX940-NEXT:s_waitcnt vmcnt(0)
+; GFX940-NEXT:global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:s_endpgm
+;
+; GFX11-LABEL: sgpr_base_large_offset:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_add_u32 s0, s0, 0xffe8
+; GFX11-NEXT:scratch_load_b32 v2, off, s0
+; GFX11-NEXT:s_waitcnt vmcnt(0)
+; GFX11-NEXT:global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:s_nop 0
+; GFX11-NEXT:s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:s_endpgm
+;
+; GFX12-LABEL: sgpr_base_large_offset:
+; GFX12:   ; %bb.0: ; %entry
+; GFX12-NEXT:scratch_load_b32 v2, off, s0 offset:65512
+; GFX12-NEXT:s_wait_loadcnt 0x0
+; GFX12-NEXT:global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:s_nop 0
+; GFX12-NEXT:s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT:s_endpgm
+entry:
+  %large_offset = getelementptr i8, ptr addrspace(5) %sgpr_base, i32 65512
+  %load = load i32, ptr addrspace(5) %large_offset, align 4
+  store i32 %load, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr 
addrspace(5) inreg %sgpr_base) {
+; GFX9-LABEL: sgpr_base_large_offset_split:
+; GFX9:   ; %bb.0: ; %entry
+; GFX9-NEXT:s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:s_and_b32 s0, s2, -4
+; GFX9-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX9-NEXT:scratch_load_dword v2, off, s0 glc
+; GFX9-NEXT:s_waitcnt vmcnt(0)
+; GFX9-NEXT:global_store_dword v[0:1], v2, off
+; GFX9-NEXT:s_endpgm
+;
+; GFX10-LABEL: sgpr_base_large_offset_split:
+; GFX10:   ; %bb.0: ; %entry
+; GFX10-NEXT:s_add_u32 s0, s0, s5
+; GFX10-NEXT:s_addc_u32 s1, s1, 0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:s_and_b32 s0, s2, -4
+; GFX10-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX10-NEXT:scratch_load_dword v2, off, s0 glc dlc
+; GFX10-NEXT:s_waitcnt vmcnt(0)
+; GFX10-NEXT:global_store_dword v[0:1], v2, off
+; GFX10-NEXT:s_endpgm
+;
+; GFX940-LABEL: sgpr_base_large_offset_split:
+; GFX940:   ; %bb.0: ; %entry
+; GFX940-NEXT:s_and_b32 s0, s0, -4
+; GFX940-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX940-NEXT:scratch_load_dword v2, off, s0 sc0 sc1
+; GFX940-NEXT:s_waitcnt vmcnt(0)
+; GFX940-NEXT:global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:s_endpgm
+;
+; GFX11-LABEL: sgpr_base_large_offset_split:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_and_b32 s0, s0, -4
+; GFX11-NEXT:s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX11-NEXT:

[llvm-branch-commits] [llvm] release/19.x: AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256) (PR #110470)

2024-09-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-globalisel

Author: None (llvmbot)


Changes

Backport e9d12a6b451bd403d95105aa976a011dc821f126 
83fe85115da9dc25fa270d2ea8140113c8d49670

Requested by: @arsenm

---

Patch is 29.90 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/110470.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll (+239) 
- (modified) llvm/test/CodeGen/AMDGPU/flat-scratch.ll (+444) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b7471bab128509..7b786ee2641721 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1911,7 +1911,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode 
*Parent, SDValue Addr,
 0);
   }
 
-  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
+  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);
 
   return true;
 }
@@ -1967,7 +1967,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, 
SDValue Addr,
   return false;
 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
   return false;
-Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
+Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
 return true;
   }
 }
@@ -2000,7 +2000,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, 
SDValue Addr,
   if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
 return false;
   SAddr = SelectSAddrFI(CurDAG, SAddr);
-  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
   return true;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index a5e4151bf36958..47ca6f416b02b0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -1513,4 +1513,243 @@ bb:
   ret void
 }
 
+define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr 
addrspace(5) inreg %sgpr_base) {
+; GFX9-LABEL: sgpr_base_large_offset:
+; GFX9:   ; %bb.0: ; %entry
+; GFX9-NEXT:s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:s_add_u32 s0, s2, 0xffe8
+; GFX9-NEXT:scratch_load_dword v2, off, s0
+; GFX9-NEXT:s_waitcnt vmcnt(0)
+; GFX9-NEXT:global_store_dword v[0:1], v2, off
+; GFX9-NEXT:s_endpgm
+;
+; GFX10-LABEL: sgpr_base_large_offset:
+; GFX10:   ; %bb.0: ; %entry
+; GFX10-NEXT:s_add_u32 s0, s0, s5
+; GFX10-NEXT:s_addc_u32 s1, s1, 0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:s_add_u32 s0, s2, 0xffe8
+; GFX10-NEXT:scratch_load_dword v2, off, s0
+; GFX10-NEXT:s_waitcnt vmcnt(0)
+; GFX10-NEXT:global_store_dword v[0:1], v2, off
+; GFX10-NEXT:s_endpgm
+;
+; GFX940-LABEL: sgpr_base_large_offset:
+; GFX940:   ; %bb.0: ; %entry
+; GFX940-NEXT:s_add_u32 s0, s0, 0xffe8
+; GFX940-NEXT:scratch_load_dword v2, off, s0
+; GFX940-NEXT:s_waitcnt vmcnt(0)
+; GFX940-NEXT:global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:s_endpgm
+;
+; GFX11-LABEL: sgpr_base_large_offset:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_add_u32 s0, s0, 0xffe8
+; GFX11-NEXT:scratch_load_b32 v2, off, s0
+; GFX11-NEXT:s_waitcnt vmcnt(0)
+; GFX11-NEXT:global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:s_nop 0
+; GFX11-NEXT:s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:s_endpgm
+;
+; GFX12-LABEL: sgpr_base_large_offset:
+; GFX12:   ; %bb.0: ; %entry
+; GFX12-NEXT:scratch_load_b32 v2, off, s0 offset:65512
+; GFX12-NEXT:s_wait_loadcnt 0x0
+; GFX12-NEXT:global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:s_nop 0
+; GFX12-NEXT:s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT:s_endpgm
+entry:
+  %large_offset = getelementptr i8, ptr addrspace(5) %sgpr_base, i32 65512
+  %load = load i32, ptr addrspace(5) %large_offset, align 4
+  store i32 %load, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr 
addrspace(5) inreg %sgpr_base) {
+; GFX9-LABEL: sgpr_base_large_offset_split:
+; GFX9:   ; %bb.0: ; %entry
+; GFX9-NEXT:s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:s_and_b32 s0, s2, -4
+; GFX9-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX9-NEXT:scratch_load_dword v2, off, s0 glc
+; GFX9-NEXT:s_waitcnt vmcnt(0)
+; GFX9-NEXT:global_store_dword v[0:1], v2, off
+; GFX9-NEXT:s_endpgm
+;
+; GFX10-LABEL: sgpr_base_large_offset_split:
+; GFX10:  

[llvm-branch-commits] [llvm] release/19.x: AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256) (PR #110470)

2024-09-30 Thread via llvm-branch-commits

llvmbot wrote:

@jayfoad @arsenm What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/110470
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256) (PR #110470)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/110470
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Serialize WWM_REG vreg flag (PR #110229)

2024-09-30 Thread Akshat Oke via llvm-branch-commits


@@ -684,8 +684,8 @@ class SIMachineFunctionInfo final : public 
AMDGPUMachineFunction,
 
   void setFlag(Register Reg, uint8_t Flag) {
 assert(Reg.isVirtual());
-if (VRegFlags.inBounds(Reg))
-  VRegFlags[Reg] |= Flag;
+VRegFlags.grow(Reg);

Akshat-Oke wrote:

The MIR function is parsed after parsing the options, so the 
`noteNewVirtualRegister` callback doesn't take effect.

https://github.com/llvm/llvm-project/pull/110229
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256) (PR #110470)

2024-09-30 Thread Jay Foad via llvm-branch-commits

https://github.com/jayfoad approved this pull request.


https://github.com/llvm/llvm-project/pull/110470
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (PR #110256)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/110256
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] 00e4f81 - Revert "[flang] Implement GETUID and GETGID intrinsics (#108017)"

2024-09-30 Thread via llvm-branch-commits

Author: David Truby
Date: 2024-09-30T17:26:07+01:00
New Revision: 00e4f81a294e8e2d78d572c76dd017a8af050cf0

URL: 
https://github.com/llvm/llvm-project/commit/00e4f81a294e8e2d78d572c76dd017a8af050cf0
DIFF: 
https://github.com/llvm/llvm-project/commit/00e4f81a294e8e2d78d572c76dd017a8af050cf0.diff

LOG: Revert "[flang] Implement GETUID and GETGID intrinsics (#108017)"

This reverts commit 054eadcb117ba7c86a99dff5c9d0ed101c7f17ea.

Added: 


Modified: 
flang/docs/Intrinsics.md
flang/include/flang/Evaluate/target.h
flang/include/flang/Optimizer/Builder/IntrinsicCall.h
flang/include/flang/Optimizer/Builder/Runtime/Intrinsics.h
flang/include/flang/Runtime/extensions.h
flang/include/flang/Tools/TargetSetup.h
flang/lib/Evaluate/intrinsics.cpp
flang/lib/Optimizer/Builder/IntrinsicCall.cpp
flang/lib/Optimizer/Builder/Runtime/Intrinsics.cpp
flang/lib/Semantics/check-call.cpp
flang/lib/Semantics/check-call.h
flang/lib/Semantics/expression.cpp
flang/runtime/extensions.cpp
flang/unittests/Optimizer/Builder/Runtime/CommandTest.cpp
flang/unittests/Optimizer/CMakeLists.txt

Removed: 
flang/test/Semantics/windows.f90
flang/unittests/Optimizer/Builder/Runtime/IntrinsicsTest.cpp



diff  --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md
index e288fdeec6cd22..87716731ead855 100644
--- a/flang/docs/Intrinsics.md
+++ b/flang/docs/Intrinsics.md
@@ -765,7 +765,7 @@ This phase currently supports all the intrinsic procedures 
listed above but the
 | Coarray intrinsic functions | COSHAPE |
 | Object characteristic inquiry functions | ALLOCATED, ASSOCIATED, 
EXTENDS_TYPE_OF, IS_CONTIGUOUS, PRESENT, RANK, SAME_TYPE, STORAGE_SIZE |
 | Type inquiry intrinsic functions | BIT_SIZE, DIGITS, EPSILON, HUGE, KIND, 
MAXEXPONENT, MINEXPONENT, NEW_LINE, PRECISION, RADIX, RANGE, TINY|
-| Non-standard intrinsic functions | AND, OR, XOR, SHIFT, ZEXT, IZEXT, COSD, 
SIND, TAND, ACOSD, ASIND, ATAND, ATAN2D, COMPL, EQV, NEQV, INT8, JINT, JNINT, 
KNINT, QCMPLX, DREAL, DFLOAT, QEXT, QFLOAT, QREAL, DNUM, NUM, JNUM, KNUM, QNUM, 
RNUM, RAN, RANF, ILEN, SIZEOF, MCLOCK, SECNDS, COTAN, IBCHNG, ISHA, ISHC, ISHL, 
IXOR, IARG, IARGC, NARGS, GETPID, NUMARG, BADDRESS, IADDR, CACHESIZE, EOF, 
FP_CLASS, INT_PTR_KIND, ISNAN, MALLOC, GETUID, GETGID |
+| Non-standard intrinsic functions | AND, OR, XOR, SHIFT, ZEXT, IZEXT, COSD, 
SIND, TAND, ACOSD, ASIND, ATAND, ATAN2D, COMPL, EQV, NEQV, INT8, JINT, JNINT, 
KNINT, QCMPLX, DREAL, DFLOAT, QEXT, QFLOAT, QREAL, DNUM, NUM, JNUM, KNUM, QNUM, 
RNUM, RAN, RANF, ILEN, SIZEOF, MCLOCK, SECNDS, COTAN, IBCHNG, ISHA, ISHC, ISHL, 
IXOR, IARG, IARGC, NARGS, GETPID, NUMARG, BADDRESS, IADDR, CACHESIZE, EOF, 
FP_CLASS, INT_PTR_KIND, ISNAN, MALLOC |
 | Intrinsic subroutines |MVBITS (elemental), CPU_TIME, DATE_AND_TIME, 
EVENT_QUERY, EXECUTE_COMMAND_LINE, GET_COMMAND, GET_COMMAND_ARGUMENT, 
GET_ENVIRONMENT_VARIABLE, MOVE_ALLOC, RANDOM_INIT, RANDOM_NUMBER, RANDOM_SEED, 
SIGNAL, SLEEP, SYSTEM, SYSTEM_CLOCK |
 | Atomic intrinsic subroutines | ATOMIC_ADD |
 | Collective intrinsic subroutines | CO_REDUCE |

diff  --git a/flang/include/flang/Evaluate/target.h 
b/flang/include/flang/Evaluate/target.h
index b347c549e012da..d076fcbf083078 100644
--- a/flang/include/flang/Evaluate/target.h
+++ b/flang/include/flang/Evaluate/target.h
@@ -102,11 +102,6 @@ class TargetCharacteristics {
   bool isPPC() const { return isPPC_; }
   void set_isPPC(bool isPPC = false);
 
-  bool isOSWindows() const { return isOSWindows_; }
-  void set_isOSWindows(bool isOSWindows = false) {
-isOSWindows_ = isOSWindows;
-  };
-
   IeeeFeatures &ieeeFeatures() { return ieeeFeatures_; }
   const IeeeFeatures &ieeeFeatures() const { return ieeeFeatures_; }
 
@@ -116,7 +111,6 @@ class TargetCharacteristics {
   std::uint8_t align_[common::TypeCategory_enumSize][maxKind]{};
   bool isBigEndian_{false};
   bool isPPC_{false};
-  bool isOSWindows_{false};
   bool areSubnormalsFlushedToZero_{false};
   Rounding roundingMode_{defaultRounding};
   std::size_t procedurePointerByteSize_{8};

diff  --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h 
b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index b2da6138fc9d8e..78bb82b17d4050 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -256,10 +256,6 @@ struct IntrinsicLibrary {
 llvm::ArrayRef args);
   void genGetCommandArgument(mlir::ArrayRef args);
   void genGetEnvironmentVariable(llvm::ArrayRef);
-  mlir::Value genGetGID(mlir::Type resultType,
-llvm::ArrayRef args);
-  mlir::Value genGetUID(mlir::Type resultType,
-llvm::ArrayRef args);
   fir::ExtendedValue genIall(mlir::Type, llvm::ArrayRef);
   mlir::Value genIand(mlir::Type, llvm::ArrayRef);
   fir::ExtendedValue genIany(mlir::Type, llvm::ArrayRef);

diff

[llvm-branch-commits] [flang] [mlir] [MLIR][OpenMP] Use map format to represent use_device_{addr, ptr} (PR #109810)

2024-09-30 Thread via llvm-branch-commits

https://github.com/agozillon approved this pull request.

LGTM, @TIFitis would be a good secondary reviewer if he wishes to do so!

https://github.com/llvm/llvm-project/pull/109810
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [MLIR][OpenMP] Use map format to represent use_device_{addr, ptr} (PR #109810)

2024-09-30 Thread via llvm-branch-commits

https://github.com/agozillon edited 
https://github.com/llvm/llvm-project/pull/109810
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Improve omp.section block arguments handling (PR #110266)

2024-09-30 Thread Michael Klemm via llvm-branch-commits

https://github.com/mjklemm approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/110266
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [MLIR][OpenMP] Use map format to represent use_device_{addr, ptr} (PR #109810)

2024-09-30 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak updated 
https://github.com/llvm/llvm-project/pull/109810

>From f61e3a60d6f494d08b58ded9b802f2b3d92b728f Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Fri, 20 Sep 2024 17:11:34 +0100
Subject: [PATCH] [MLIR][OpenMP] Use map format to represent
 use_device_{addr,ptr}

This patch updates the `omp.target_data` operation to use the same formatting
as `map` clauses on `omp.target` for `use_device_addr` and `use_device_ptr`.
This is done so the mapping that is being enforced between op arguments and
associated entry block arguments is explicit.

The way it is achieved is by marking these clauses as entry block
argument-defining and adjusting printer/parsers accordingly.

As a result of this change, block arguments for `use_device_addr` come before
those for `use_device_ptr`, which is the opposite of the previous undocumented
situation. Some unit tests are updated based on this change, in addition to
those updated because of the format change.
---
 .../Fir/convert-to-llvm-openmp-and-fir.fir|  5 +-
 flang/test/Lower/OpenMP/target.f90|  6 +-
 .../use-device-ptr-to-use-device-addr.f90 | 12 +--
 .../mlir/Dialect/OpenMP/OpenMPClauses.td  | 28 ++-
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  6 ++
 .../Dialect/OpenMP/OpenMPOpsInterfaces.td | 37 -
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 43 +++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp  | 77 ---
 mlir/test/Dialect/OpenMP/ops.mlir |  6 +-
 mlir/test/Target/LLVMIR/omptarget-llvm.mlir   | 19 ++---
 .../openmp-target-use-device-nested.mlir  |  3 +-
 11 files changed, 179 insertions(+), 63 deletions(-)

diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir 
b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
index 4d226eaa754c12..61f18008633d50 100644
--- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
@@ -429,13 +429,14 @@ func.func @_QPopenmp_target_data_region() {
 
 func.func @_QPomp_target_data_empty() {
   %0 = fir.alloca !fir.array<1024xi32> {bindc_name = "a", uniq_name = 
"_QFomp_target_data_emptyEa"}
-  omp.target_data use_device_addr(%0 : !fir.ref>) {
+  omp.target_data use_device_addr(%0 -> %arg0 : 
!fir.ref>) {
+omp.terminator
   }
   return
 }
 
 // CHECK-LABEL:   llvm.func @_QPomp_target_data_empty
-// CHECK: omp.target_data   use_device_addr(%1 : !llvm.ptr) {
+// CHECK: omp.target_data   use_device_addr(%1 -> %{{.*}} : !llvm.ptr) {
 // CHECK: }
 
 // -
diff --git a/flang/test/Lower/OpenMP/target.f90 
b/flang/test/Lower/OpenMP/target.f90
index dedce581436490..ab33b6b3808315 100644
--- a/flang/test/Lower/OpenMP/target.f90
+++ b/flang/test/Lower/OpenMP/target.f90
@@ -506,9 +506,8 @@ subroutine omp_target_device_ptr
type(c_ptr) :: a
integer, target :: b
!CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}})   map_clauses(tofrom) 
capture(ByRef) -> {{.*}} {name = "a"}
-   !CHECK: omp.target_data map_entries(%[[MAP]]{{.*}}) use_device_ptr({{.*}})
+   !CHECK: omp.target_data map_entries(%[[MAP]]{{.*}}) use_device_ptr({{.*}} 
-> %[[VAL_1:.*]] : 
!fir.ref>)
!$omp target data map(tofrom: a) use_device_ptr(a)
-   !CHECK: ^bb0(%[[VAL_1:.*]]: 
!fir.ref>):
!CHECK: {{.*}} = fir.coordinate_of %[[VAL_1:.*]], {{.*}} : 
(!fir.ref>, 
!fir.field) -> !fir.ref
   a = c_loc(b)
!CHECK: omp.terminator
@@ -529,9 +528,8 @@ subroutine omp_target_device_addr
!CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}} : 
!fir.ref>>, !fir.box>) map_clauses(tofrom) 
capture(ByRef) members(%[[MAP_MEMBERS]] : [0] : !fir.llvm_ptr>) 
-> !fir.ref>> {name = "a"}
!CHECK: %[[DEV_ADDR_MEMBERS:.*]] = omp.map.info var_ptr({{.*}} : 
!fir.ref>>, i32) var_ptr_ptr({{.*}} : 
!fir.llvm_ptr>) map_clauses(tofrom) capture(ByRef) -> 
!fir.llvm_ptr> {name = ""}
!CHECK: %[[DEV_ADDR:.*]] = omp.map.info var_ptr({{.*}} : 
!fir.ref>>, !fir.box>) map_clauses(tofrom) 
capture(ByRef) members(%[[DEV_ADDR_MEMBERS]] : [0] : 
!fir.llvm_ptr>) -> !fir.ref>> {name = "a"}
-   !CHECK: omp.target_data map_entries(%[[MAP_MEMBERS]], %[[MAP]] : {{.*}}) 
use_device_addr(%[[DEV_ADDR_MEMBERS]], %[[DEV_ADDR]] : {{.*}}) {
+   !CHECK: omp.target_data map_entries(%[[MAP_MEMBERS]], %[[MAP]] : {{.*}}) 
use_device_addr(%[[DEV_ADDR_MEMBERS]] -> %[[ARG_0:.*]], %[[DEV_ADDR]] -> 
%[[ARG_1:.*]] : !fir.llvm_ptr>, 
!fir.ref>>) {
!$omp target data map(tofrom: a) use_device_addr(a)
-   !CHECK: ^bb0(%[[ARG_0:.*]]: !fir.llvm_ptr>, %[[ARG_1:.*]]: 
!fir.ref>>):
!CHECK: %[[VAL_1_DECL:.*]]:2 = hlfir.declare %[[ARG_1]] {fortran_attrs = 
#fir.var_attrs, uniq_name = "_QFomp_target_device_addrEa"} : 
(!fir.ref>>) -> (!fir.ref>>, 
!fir.ref>>)
!CHECK: %[[C10:.*]] = arith.constant 10 : i32
!CHECK: %[[A_BOX:.*]] = fir.load %[[VAL_1_DECL]]#0 : 
!fir.ref>>
diff --git a/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 
b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90
index 085f5419fa7f88..cb26246a6

[llvm-branch-commits] [mlir] [MLIR][OpenMP] Improve omp.section block arguments handling (PR #110266)

2024-09-30 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak updated 
https://github.com/llvm/llvm-project/pull/110266

>From d6920f4bd10cdf88d6d640f8e1da2c595c39bdb6 Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Thu, 26 Sep 2024 11:42:03 +0100
Subject: [PATCH] [MLIR][OpenMP] Improve omp.section block arguments handling

The `omp.section` operation is an outlier in that the block arguments it has
are defined by clauses on the required parent `omp.sections` operation.

This patch updates the definition of this operation introducing the
`BlockArgOpenMPOpInterface` to simplify the handling and verification of these
block arguments, implemented based on the parent `omp.sections`.
---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 12 +++--
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 12 +
 mlir/test/Dialect/OpenMP/invalid.mlir | 25 +++
 mlir/test/Dialect/OpenMP/ops.mlir |  6 +
 4 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index d2a2b44c042fb7..66f63fc02fe2f3 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -207,8 +207,9 @@ def TeamsOp : OpenMP_Op<"teams", traits = [
 // 2.8.1 Sections Construct
 
//===--===//
 
-def SectionOp : OpenMP_Op<"section", [HasParent<"SectionsOp">],
-  singleRegion = true> {
+def SectionOp : OpenMP_Op<"section", traits = [
+BlockArgOpenMPOpInterface, HasParent<"SectionsOp">
+  ], singleRegion = true> {
   let summary = "section directive";
   let description = [{
 A section operation encloses a region which represents one section in a
@@ -218,6 +219,13 @@ def SectionOp : OpenMP_Op<"section", 
[HasParent<"SectionsOp">],
 operation. This is done to reflect situations where these block arguments
 represent variables private to each section.
   }];
+  let extraClassDeclaration = [{
+// Override BlockArgOpenMPOpInterface methods based on the parent
+// omp.sections operation. Only forward-declare here because SectionsOp is
+// not completely defined at this point.
+unsigned numPrivateBlockArgs();
+unsigned numReductionBlockArgs();
+  }] # clausesExtraClassDeclaration;
   let assemblyFormat = "$region attr-dict";
 }
 
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 
b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 220eb848ab4de2..928a07580b2637 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1844,6 +1844,18 @@ LogicalResult TeamsOp::verify() {
 getReductionByref());
 }
 
+//===--===//
+// SectionOp
+//===--===//
+
+unsigned SectionOp::numPrivateBlockArgs() {
+  return getParentOp().numPrivateBlockArgs();
+}
+
+unsigned SectionOp::numReductionBlockArgs() {
+  return getParentOp().numReductionBlockArgs();
+}
+
 
//===--===//
 // SectionsOp
 
//===--===//
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir 
b/mlir/test/Dialect/OpenMP/invalid.mlir
index 2e4df7422e4a49..a228b6430560ea 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -1572,6 +1572,31 @@ func.func @omp_sections() {
 
 // -
 
+omp.declare_reduction @add_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = arith.constant 0.0 : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = arith.addf %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+
+func.func @omp_sections(%x : !llvm.ptr) {
+  omp.sections reduction(@add_f32 %x -> %arg0 : !llvm.ptr) {
+// expected-error @below {{op expected at least 1 entry block argument(s)}}
+omp.section {
+  omp.terminator
+}
+omp.terminator
+  }
+  return
+}
+
+// -
+
 func.func @omp_single(%data_var : memref) -> () {
   // expected-error @below {{expected equal sizes for allocate and allocator 
variables}}
   "omp.single" (%data_var) ({
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir 
b/mlir/test/Dialect/OpenMP/ops.mlir
index ce3351ba1149f3..a4423782a723bf 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -1127,11 +1127,13 @@ func.func @sections_reduction() {
   omp.sections reduction(@add_f32 %0 -> %arg0 : !llvm.ptr) {
 // CHECK: omp.section
 omp.section {
+^bb0(%arg1 : !llvm.ptr):
   %1 = arith.constant 2.0 : f32
   omp.terminator
 }
 // CHECK: omp.section
 omp.section {
+^bb0(%arg1 : !llvm.ptr):
   %1 = arith.constant 3.0 : f32
   omp.terminator
 }
@@ -1148,11 +1150,13 @@ func.func @sections_reduction_byref() {
   omp

[llvm-branch-commits] [mlir] [MLIR][OpenMP] Document entry block argument-defining clauses (NFC) (PR #109811)

2024-09-30 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak updated 
https://github.com/llvm/llvm-project/pull/109811

>From a821f44e2c9ac732c752abae62385c4d78082a2b Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Tue, 24 Sep 2024 15:40:17 +0100
Subject: [PATCH] [MLIR][OpenMP] Document entry block argument-defining clauses
 (NFC)

This patch adds general information on the proposed approach to unify the
handling and representation of clauses that define entry block arguments
attached to operations that accept them.
---
 mlir/docs/Dialects/OpenMPDialect/_index.md | 70 +-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/mlir/docs/Dialects/OpenMPDialect/_index.md 
b/mlir/docs/Dialects/OpenMPDialect/_index.md
index 88437b8cf828cc..3c30b29d09356b 100644
--- a/mlir/docs/Dialects/OpenMPDialect/_index.md
+++ b/mlir/docs/Dialects/OpenMPDialect/_index.md
@@ -285,7 +285,75 @@ argument's type:
   specific `mlir::Attribute` subclass) will be used instead.
   - Other attribute types will be represented with their `storageType`.
 - It will create `Operands` structure for each operation, which is an
-empty structure subclassing all operand structures defined for the 
corresponding `OpenMP_Op`'s clauses.
+empty structure subclassing all operand structures defined for the 
corresponding
+`OpenMP_Op`'s clauses.
+
+### Entry Block Argument-Defining Clauses
+
+Certain OpenMP clauses introduce in their MLIR representation mappings between
+outside values and entry block arguments for the region of the MLIR operation
+they are applied to. This enables, for example, the introduction of private
+copies of the same underlying variable. Currently, clauses with this property
+can be classified in three main categories:
+  - Map-like clauses: `map`, `use_device_addr` and `use_device_ptr`.
+  - Reduction-like clauses: `in_reduction`, `reduction` and `task_reduction`.
+  - Privatization clause: `private`.
+
+All three kinds of entry block argument-defining clauses use a similar custom
+assembly format representation, only differing based on the different pieces of
+information attached to each kind. Below, one example of each is shown:
+
+```mlir
+omp.target map_entries(%x -> %x.m, %y -> %y.m : !llvm.ptr, !llvm.ptr) {
+  // Use %x.m, %y.m in place of %x and %y...
+}
+
+omp.wsloop reduction(@add.i32 %x -> %x.r, byref @add.f32 %y -> %y.r : 
!llvm.ptr, !llvm.ptr) {
+  // Use %x.r, %y.r in place of %x and %y...
+}
+
+omp.parallel private(@x.privatizer %x -> %x.p, @y.privatizer %y -> %y.p : 
!llvm.ptr, !llvm.ptr) {
+  // Use %x.p, %y.p in place of %x and %y...
+}
+```
+
+As a consequence of parsing and printing the operation's first region entry
+block argument names together with the custom assembly format of these clauses,
+entry block arguments (i.e. the `^bb0(...):` line) must not be explicitly
+defined for these operations. Additionally, it is not possible to implement 
this
+feature while allowing each clause to be independently parsed and printed,
+because they need to be printed/parsed together with the corresponding
+operation's first region. They must have a well-defined ordering in which
+multiple of these clauses are specified for a given operation, as well.
+
+The parsing/printing of these clauses together with the region provides the
+ability to define entry block arguments directly after the `->`. Forcing a
+specific ordering between these clauses makes the block argument ordering
+well-defined, which is the property used to easily match each clause with the
+entry block arguments defined by it.
+
+Custom printers and parsers for operation regions based on the entry block
+argument-defining clauses they take are implemented based on the
+`{parse,print}BlockArgRegion` functions, which take care of the sorting and
+formatting of each kind of clause, minimizing code duplication resulting from
+this approach. One example of the custom assembly format of an operation taking
+the `private` and `reduction` clauses is the following:
+
+```tablegen
+let assemblyFormat = clausesAssemblyFormat # [{
+  custom($region, $private_vars, type($private_vars),
+  $private_syms, $reduction_vars, type($reduction_vars), $reduction_byref,
+  $reduction_syms) attr-dict
+}];
+```
+
+The `BlockArgOpenMPOpInterface` has been introduced to simplify the addition 
and
+handling of these kinds of clauses. It holds `numBlockArgs()`
+functions that by default return 0, to be overriden by each clause through the
+`extraClassDeclaration` property. Based on these functions and the expected
+alphabetical sorting between entry block argument-defining clauses, it
+implements `getBlockArgs()` functions that are the intended method
+of accessing clause-defined block arguments.
 
 ## Loop-Associated Directives
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: FastISel: Fix incorrectly using getPointerTy (#110465) (PR #110490)

2024-09-30 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/110490

Backport 81ba95cefe1b5a12f0a7d8e6a383bcce9e95b785

Requested by: @arsenm

>From 2c0b211043d4516fa33c1a87c0e239f4de58b4fc Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 30 Sep 2024 13:43:53 +0400
Subject: [PATCH] FastISel: Fix incorrectly using getPointerTy (#110465)

This was using the default address space instead of the
correct one.

Fixes #56055

(cherry picked from commit 81ba95cefe1b5a12f0a7d8e6a383bcce9e95b785)
---
 llvm/include/llvm/CodeGen/FastISel.h   |  2 +-
 llvm/lib/CodeGen/SelectionDAG/FastISel.cpp |  8 +--
 llvm/lib/Target/X86/X86FastISel.cpp|  4 +-
 llvm/test/CodeGen/X86/issue56055.ll| 81 ++
 4 files changed, 89 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/issue56055.ll

diff --git a/llvm/include/llvm/CodeGen/FastISel.h 
b/llvm/include/llvm/CodeGen/FastISel.h
index 3cbc35400181dd..f3c4cc8d0511d4 100644
--- a/llvm/include/llvm/CodeGen/FastISel.h
+++ b/llvm/include/llvm/CodeGen/FastISel.h
@@ -275,7 +275,7 @@ class FastISel {
 
   /// This is a wrapper around getRegForValue that also takes care of
   /// truncating or sign-extending the given getelementptr index value.
-  Register getRegForGEPIndex(const Value *Idx);
+  Register getRegForGEPIndex(MVT PtrVT, const Value *Idx);
 
   /// We're checking to see if we can fold \p LI into \p FoldInst. Note
   /// that we could have a sequence where multiple LLVM IR instructions are
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp 
b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index ef9f7833551905..246acc7f405837 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -380,14 +380,13 @@ void FastISel::updateValueMap(const Value *I, Register 
Reg, unsigned NumRegs) {
   }
 }
 
-Register FastISel::getRegForGEPIndex(const Value *Idx) {
+Register FastISel::getRegForGEPIndex(MVT PtrVT, const Value *Idx) {
   Register IdxN = getRegForValue(Idx);
   if (!IdxN)
 // Unhandled operand. Halt "fast" selection and bail.
 return Register();
 
   // If the index is smaller or larger than intptr_t, truncate or extend it.
-  MVT PtrVT = TLI.getPointerTy(DL);
   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
   if (IdxVT.bitsLT(PtrVT)) {
 IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
@@ -543,7 +542,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
   uint64_t TotalOffs = 0;
   // FIXME: What's a good SWAG number for MaxOffs?
   uint64_t MaxOffs = 2048;
-  MVT VT = TLI.getPointerTy(DL);
+  MVT VT = TLI.getValueType(DL, I->getType()).getSimpleVT();
+
   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
GTI != E; ++GTI) {
 const Value *Idx = GTI.getOperand();
@@ -584,7 +584,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
 
   // N = N + Idx * ElementSize;
   uint64_t ElementSize = GTI.getSequentialElementStride(DL);
-  Register IdxN = getRegForGEPIndex(Idx);
+  Register IdxN = getRegForGEPIndex(VT, Idx);
   if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
 return false;
 
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp 
b/llvm/lib/Target/X86/X86FastISel.cpp
index 2eae155956368f..5d594bd54fbfc4 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -902,6 +902,8 @@ bool X86FastISel::X86SelectAddress(const Value *V, 
X86AddressMode &AM) {
 uint64_t Disp = (int32_t)AM.Disp;
 unsigned IndexReg = AM.IndexReg;
 unsigned Scale = AM.Scale;
+MVT PtrVT = TLI.getValueType(DL, U->getType()).getSimpleVT();
+
 gep_type_iterator GTI = gep_type_begin(U);
 // Iterate through the indices, folding what we can. Constants can be
 // folded, and one dynamic index can be handled, if the scale is supported.
@@ -937,7 +939,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, 
X86AddressMode &AM) {
 (S == 1 || S == 2 || S == 4 || S == 8)) {
   // Scaled-index addressing.
   Scale = S;
-  IndexReg = getRegForGEPIndex(Op);
+  IndexReg = getRegForGEPIndex(PtrVT, Op);
   if (IndexReg == 0)
 return false;
   break;
diff --git a/llvm/test/CodeGen/X86/issue56055.ll 
b/llvm/test/CodeGen/X86/issue56055.ll
new file mode 100644
index 00..27eaf13e3b00be
--- /dev/null
+++ b/llvm/test/CodeGen/X86/issue56055.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -fast-isel < %s | FileCheck -check-prefixes=CHECK,FASTISEL %s
+; RUN: llc < %s | FileCheck -check-prefixes=CHECK,SDAG %s
+
+target datalayout = 
"e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-windows-msvc"
+
+define void @issue56055(ptr addrspace(270) %ptr, ptr %out) {
+; CHECK-LABEL: issue56055:
+; CHECK

[llvm-branch-commits] [llvm] release/19.x: FastISel: Fix incorrectly using getPointerTy (#110465) (PR #110490)

2024-09-30 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/110490
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: FastISel: Fix incorrectly using getPointerTy (#110465) (PR #110490)

2024-09-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: None (llvmbot)


Changes

Backport 81ba95cefe1b5a12f0a7d8e6a383bcce9e95b785

Requested by: @arsenm

---
Full diff: https://github.com/llvm/llvm-project/pull/110490.diff


4 Files Affected:

- (modified) llvm/include/llvm/CodeGen/FastISel.h (+1-1) 
- (modified) llvm/lib/CodeGen/SelectionDAG/FastISel.cpp (+4-4) 
- (modified) llvm/lib/Target/X86/X86FastISel.cpp (+3-1) 
- (added) llvm/test/CodeGen/X86/issue56055.ll (+81) 


``diff
diff --git a/llvm/include/llvm/CodeGen/FastISel.h 
b/llvm/include/llvm/CodeGen/FastISel.h
index 3cbc35400181dd..f3c4cc8d0511d4 100644
--- a/llvm/include/llvm/CodeGen/FastISel.h
+++ b/llvm/include/llvm/CodeGen/FastISel.h
@@ -275,7 +275,7 @@ class FastISel {
 
   /// This is a wrapper around getRegForValue that also takes care of
   /// truncating or sign-extending the given getelementptr index value.
-  Register getRegForGEPIndex(const Value *Idx);
+  Register getRegForGEPIndex(MVT PtrVT, const Value *Idx);
 
   /// We're checking to see if we can fold \p LI into \p FoldInst. Note
   /// that we could have a sequence where multiple LLVM IR instructions are
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp 
b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index ef9f7833551905..246acc7f405837 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -380,14 +380,13 @@ void FastISel::updateValueMap(const Value *I, Register 
Reg, unsigned NumRegs) {
   }
 }
 
-Register FastISel::getRegForGEPIndex(const Value *Idx) {
+Register FastISel::getRegForGEPIndex(MVT PtrVT, const Value *Idx) {
   Register IdxN = getRegForValue(Idx);
   if (!IdxN)
 // Unhandled operand. Halt "fast" selection and bail.
 return Register();
 
   // If the index is smaller or larger than intptr_t, truncate or extend it.
-  MVT PtrVT = TLI.getPointerTy(DL);
   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
   if (IdxVT.bitsLT(PtrVT)) {
 IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
@@ -543,7 +542,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
   uint64_t TotalOffs = 0;
   // FIXME: What's a good SWAG number for MaxOffs?
   uint64_t MaxOffs = 2048;
-  MVT VT = TLI.getPointerTy(DL);
+  MVT VT = TLI.getValueType(DL, I->getType()).getSimpleVT();
+
   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
GTI != E; ++GTI) {
 const Value *Idx = GTI.getOperand();
@@ -584,7 +584,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
 
   // N = N + Idx * ElementSize;
   uint64_t ElementSize = GTI.getSequentialElementStride(DL);
-  Register IdxN = getRegForGEPIndex(Idx);
+  Register IdxN = getRegForGEPIndex(VT, Idx);
   if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
 return false;
 
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp 
b/llvm/lib/Target/X86/X86FastISel.cpp
index 2eae155956368f..5d594bd54fbfc4 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -902,6 +902,8 @@ bool X86FastISel::X86SelectAddress(const Value *V, 
X86AddressMode &AM) {
 uint64_t Disp = (int32_t)AM.Disp;
 unsigned IndexReg = AM.IndexReg;
 unsigned Scale = AM.Scale;
+MVT PtrVT = TLI.getValueType(DL, U->getType()).getSimpleVT();
+
 gep_type_iterator GTI = gep_type_begin(U);
 // Iterate through the indices, folding what we can. Constants can be
 // folded, and one dynamic index can be handled, if the scale is supported.
@@ -937,7 +939,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, 
X86AddressMode &AM) {
 (S == 1 || S == 2 || S == 4 || S == 8)) {
   // Scaled-index addressing.
   Scale = S;
-  IndexReg = getRegForGEPIndex(Op);
+  IndexReg = getRegForGEPIndex(PtrVT, Op);
   if (IndexReg == 0)
 return false;
   break;
diff --git a/llvm/test/CodeGen/X86/issue56055.ll 
b/llvm/test/CodeGen/X86/issue56055.ll
new file mode 100644
index 00..27eaf13e3b00be
--- /dev/null
+++ b/llvm/test/CodeGen/X86/issue56055.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -fast-isel < %s | FileCheck -check-prefixes=CHECK,FASTISEL %s
+; RUN: llc < %s | FileCheck -check-prefixes=CHECK,SDAG %s
+
+target datalayout = 
"e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-windows-msvc"
+
+define void @issue56055(ptr addrspace(270) %ptr, ptr %out) {
+; CHECK-LABEL: issue56055:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:addl $2, %ecx
+; CHECK-NEXT:movl %ecx, (%rdx)
+; CHECK-NEXT:retq
+  %add.ptr = getelementptr inbounds i8, ptr addrspace(270) %ptr, i32 2
+  store ptr addrspace(270) %add.ptr, ptr %out
+  ret void
+}
+
+define void @issue56055_vector(<2 x ptr addrspace(270)> %ptr, ptr %out) {
+; CHECK-LABEL: issue56055_vector:
+; CH

[llvm-branch-commits] [llvm] release/19.x: FastISel: Fix incorrectly using getPointerTy (#110465) (PR #110490)

2024-09-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-selectiondag

Author: None (llvmbot)


Changes

Backport 81ba95cefe1b5a12f0a7d8e6a383bcce9e95b785

Requested by: @arsenm

---
Full diff: https://github.com/llvm/llvm-project/pull/110490.diff


4 Files Affected:

- (modified) llvm/include/llvm/CodeGen/FastISel.h (+1-1) 
- (modified) llvm/lib/CodeGen/SelectionDAG/FastISel.cpp (+4-4) 
- (modified) llvm/lib/Target/X86/X86FastISel.cpp (+3-1) 
- (added) llvm/test/CodeGen/X86/issue56055.ll (+81) 


``diff
diff --git a/llvm/include/llvm/CodeGen/FastISel.h 
b/llvm/include/llvm/CodeGen/FastISel.h
index 3cbc35400181dd..f3c4cc8d0511d4 100644
--- a/llvm/include/llvm/CodeGen/FastISel.h
+++ b/llvm/include/llvm/CodeGen/FastISel.h
@@ -275,7 +275,7 @@ class FastISel {
 
   /// This is a wrapper around getRegForValue that also takes care of
   /// truncating or sign-extending the given getelementptr index value.
-  Register getRegForGEPIndex(const Value *Idx);
+  Register getRegForGEPIndex(MVT PtrVT, const Value *Idx);
 
   /// We're checking to see if we can fold \p LI into \p FoldInst. Note
   /// that we could have a sequence where multiple LLVM IR instructions are
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp 
b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index ef9f7833551905..246acc7f405837 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -380,14 +380,13 @@ void FastISel::updateValueMap(const Value *I, Register 
Reg, unsigned NumRegs) {
   }
 }
 
-Register FastISel::getRegForGEPIndex(const Value *Idx) {
+Register FastISel::getRegForGEPIndex(MVT PtrVT, const Value *Idx) {
   Register IdxN = getRegForValue(Idx);
   if (!IdxN)
 // Unhandled operand. Halt "fast" selection and bail.
 return Register();
 
   // If the index is smaller or larger than intptr_t, truncate or extend it.
-  MVT PtrVT = TLI.getPointerTy(DL);
   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
   if (IdxVT.bitsLT(PtrVT)) {
 IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
@@ -543,7 +542,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
   uint64_t TotalOffs = 0;
   // FIXME: What's a good SWAG number for MaxOffs?
   uint64_t MaxOffs = 2048;
-  MVT VT = TLI.getPointerTy(DL);
+  MVT VT = TLI.getValueType(DL, I->getType()).getSimpleVT();
+
   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
GTI != E; ++GTI) {
 const Value *Idx = GTI.getOperand();
@@ -584,7 +584,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
 
   // N = N + Idx * ElementSize;
   uint64_t ElementSize = GTI.getSequentialElementStride(DL);
-  Register IdxN = getRegForGEPIndex(Idx);
+  Register IdxN = getRegForGEPIndex(VT, Idx);
   if (!IdxN) // Unhandled operand. Halt "fast" selection and bail.
 return false;
 
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp 
b/llvm/lib/Target/X86/X86FastISel.cpp
index 2eae155956368f..5d594bd54fbfc4 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -902,6 +902,8 @@ bool X86FastISel::X86SelectAddress(const Value *V, 
X86AddressMode &AM) {
 uint64_t Disp = (int32_t)AM.Disp;
 unsigned IndexReg = AM.IndexReg;
 unsigned Scale = AM.Scale;
+MVT PtrVT = TLI.getValueType(DL, U->getType()).getSimpleVT();
+
 gep_type_iterator GTI = gep_type_begin(U);
 // Iterate through the indices, folding what we can. Constants can be
 // folded, and one dynamic index can be handled, if the scale is supported.
@@ -937,7 +939,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, 
X86AddressMode &AM) {
 (S == 1 || S == 2 || S == 4 || S == 8)) {
   // Scaled-index addressing.
   Scale = S;
-  IndexReg = getRegForGEPIndex(Op);
+  IndexReg = getRegForGEPIndex(PtrVT, Op);
   if (IndexReg == 0)
 return false;
   break;
diff --git a/llvm/test/CodeGen/X86/issue56055.ll 
b/llvm/test/CodeGen/X86/issue56055.ll
new file mode 100644
index 00..27eaf13e3b00be
--- /dev/null
+++ b/llvm/test/CodeGen/X86/issue56055.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -fast-isel < %s | FileCheck -check-prefixes=CHECK,FASTISEL %s
+; RUN: llc < %s | FileCheck -check-prefixes=CHECK,SDAG %s
+
+target datalayout = 
"e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-windows-msvc"
+
+define void @issue56055(ptr addrspace(270) %ptr, ptr %out) {
+; CHECK-LABEL: issue56055:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:addl $2, %ecx
+; CHECK-NEXT:movl %ecx, (%rdx)
+; CHECK-NEXT:retq
+  %add.ptr = getelementptr inbounds i8, ptr addrspace(270) %ptr, i32 2
+  store ptr addrspace(270) %add.ptr, ptr %out
+  ret void
+}
+
+define void @issue56055_vector(<2 x ptr addrspace(270)> %ptr, ptr %out) {
+; CHECK-LABEL: issue56055_vector:

[llvm-branch-commits] [llvm] release/19.x: FastISel: Fix incorrectly using getPointerTy (#110465) (PR #110490)

2024-09-30 Thread via llvm-branch-commits

llvmbot wrote:

@nikic What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/110490
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Adopt new lowering sequence for `fdiv16` (PR #109295)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits


@@ -10616,19 +10616,43 @@ SDValue SITargetLowering::LowerFDIV16(SDValue Op, 
SelectionDAG &DAG) const {
 return FastLowered;
 
   SDLoc SL(Op);
-  SDValue Src0 = Op.getOperand(0);
-  SDValue Src1 = Op.getOperand(1);
-
-  SDValue CvtSrc0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
-  SDValue CvtSrc1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
-
-  SDValue RcpSrc1 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, CvtSrc1);
-  SDValue Quot = DAG.getNode(ISD::FMUL, SL, MVT::f32, CvtSrc0, RcpSrc1);
-
-  SDValue FPRoundFlag = DAG.getTargetConstant(0, SL, MVT::i32);
-  SDValue BestQuot = DAG.getNode(ISD::FP_ROUND, SL, MVT::f16, Quot, 
FPRoundFlag);
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
 
-  return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f16, BestQuot, Src1, Src0);
+  // a32.u = opx(V_CVT_F32_F16, a.u); // CVT to F32
+  // b32.u = opx(V_CVT_F32_F16, b.u); // CVT to F32
+  // r32.u = opx(V_RCP_F32, b32.u); // rcp = 1 / d
+  // q32.u = opx(V_MUL_F32, a32.u, r32.u); // q = n * rcp
+  // e32.u = opx(V_MAD_F32, (b32.u^_neg32), q32.u, a32.u); // err = -d * q + n
+  // q32.u = opx(V_MAD_F32, e32.u, r32.u, q32.u); // q = n * rcp
+  // e32.u = opx(V_MAD_F32, (b32.u^_neg32), q32.u, a32.u); // err = -d * q + n
+  // tmp.u = opx(V_MUL_F32, e32.u, r32.u);
+  // tmp.u = opx(V_AND_B32, tmp.u, 0xff80)
+  // q32.u = opx(V_ADD_F32, tmp.u, q32.u);
+  // q16.u = opx(V_CVT_F16_F32, q32.u);
+  // q16.u = opx(V_DIV_FIXUP_F16, q16.u, b.u, a.u); // q = touchup(q, d, n)
+
+  // We will use ISD::FMA on targets that don't support ISD::FMAD.
+  unsigned FMADOpCode =
+  isOperationLegal(ISD::FMAD, MVT::f32) ? ISD::FMAD : ISD::FMA;
+
+  SDValue LHSExt = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, LHS);
+  SDValue RHSExt = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, RHS);

arsenm wrote:

Didn't propagate flags 

https://github.com/llvm/llvm-project/pull/109295
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Adopt new lowering sequence for `fdiv16` (PR #109295)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits


@@ -4903,16 +4903,40 @@ bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr 
&MI,
   LLT S16 = LLT::scalar(16);
   LLT S32 = LLT::scalar(32);
 
+  // a32.u = opx(V_CVT_F32_F16, a.u); // CVT to F32
+  // b32.u = opx(V_CVT_F32_F16, b.u); // CVT to F32
+  // r32.u = opx(V_RCP_F32, b32.u); // rcp = 1 / d
+  // q32.u = opx(V_MUL_F32, a32.u, r32.u); // q = n * rcp
+  // e32.u = opx(V_MAD_F32, (b32.u^_neg32), q32.u, a32.u); // err = -d * q + n
+  // q32.u = opx(V_MAD_F32, e32.u, r32.u, q32.u); // q = n * rcp
+  // e32.u = opx(V_MAD_F32, (b32.u^_neg32), q32.u, a32.u); // err = -d * q + n
+  // tmp.u = opx(V_MUL_F32, e32.u, r32.u);
+  // tmp.u = opx(V_AND_B32, tmp.u, 0xff80)
+  // q32.u = opx(V_ADD_F32, tmp.u, q32.u);
+  // q16.u = opx(V_CVT_F16_F32, q32.u);
+  // q16.u = opx(V_DIV_FIXUP_F16, q16.u, b.u, a.u); // q = touchup(q, d, n)
+
   auto LHSExt = B.buildFPExt(S32, LHS, Flags);
   auto RHSExt = B.buildFPExt(S32, RHS, Flags);
-
-  auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
+  auto NegRHSExt = B.buildFNeg(S32, RHSExt);
+  auto Rcp = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32})
  .addUse(RHSExt.getReg(0))
  .setMIFlags(Flags);
-
-  auto QUOT = B.buildFMul(S32, LHSExt, RCP, Flags);
-  auto RDst = B.buildFPTrunc(S16, QUOT, Flags);
-
+  auto Quot = B.buildFMul(S32, LHSExt, Rcp);

arsenm wrote:

Lost flags after this point 

https://github.com/llvm/llvm-project/pull/109295
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [mlir] [MLIR][OpenMP] Use map format to represent use_device_{addr, ptr} (PR #109810)

2024-09-30 Thread Sergio Afonso via llvm-branch-commits

skatrak wrote:

Ping for review!

https://github.com/llvm/llvm-project/pull/109810
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add tests for SIPreAllocateWWMRegs (PR #109963)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits


@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs 
-run-pass=si-pre-allocate-wwm-regs -o - -mcpu=tahiti %s  | FileCheck %s
+
+---
+
+name: pre_allocate_wwm_regs_strict
+tracksRegLiveness: true
+body: |

arsenm wrote:

WWMSpills, SpillVGPRs, SpillPhysVGPRs, SGPRSpillsTo*

https://github.com/llvm/llvm-project/pull/109963
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AArch64] Generalize the instruction size checking in AsmPrinter (PR #110108)

2024-09-30 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/110108

>From 92eb911fcd781825fa88aaec6c05b9484f49d158 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Wed, 25 Sep 2024 16:16:29 +0300
Subject: [PATCH] [AArch64] Generalize the instruction size checking in
 AsmPrinter

Most of PAuth-related code counts the instructions being inserted and
asserts that no more bytes are emitted than the size returned by the
getInstSizeInBytes(MI) method. This check seems useful not only for
PAuth-related instructions. Also, reimplementing it globally in
AArch64AsmPrinter makes it more robust and simplifies further
refactoring of PAuth-related code.
---
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 121 +++---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   1 +
 2 files changed, 44 insertions(+), 78 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp 
b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 47dd32ad2adc2f..c6ee8d43bd8f2d 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -24,6 +24,7 @@
 #include "MCTargetDesc/AArch64TargetStreamer.h"
 #include "TargetInfo/AArch64TargetInfo.h"
 #include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -86,6 +87,9 @@ class AArch64AsmPrinter : public AsmPrinter {
   FaultMaps FM;
   const AArch64Subtarget *STI;
   bool ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = false;
+#ifndef NDEBUG
+  unsigned InstsEmitted;
+#endif
 
 public:
   AArch64AsmPrinter(TargetMachine &TM, std::unique_ptr Streamer)
@@ -150,8 +154,7 @@ class AArch64AsmPrinter : public AsmPrinter {
   void emitPtrauthAuthResign(const MachineInstr *MI);
 
   // Emit the sequence to compute a discriminator into x17, or reuse AddrDisc.
-  unsigned emitPtrauthDiscriminator(uint16_t Disc, unsigned AddrDisc,
-unsigned &InstsEmitted);
+  unsigned emitPtrauthDiscriminator(uint16_t Disc, unsigned AddrDisc);
 
   // Emit the sequence for LOADauthptrstatic
   void LowerLOADauthptrstatic(const MachineInstr &MI);
@@ -1338,8 +1341,6 @@ void 
AArch64AsmPrinter::LowerJumpTableDest(llvm::MCStreamer &OutStreamer,
 }
 
 void AArch64AsmPrinter::LowerHardenedBRJumpTable(const MachineInstr &MI) {
-  unsigned InstsEmitted = 0;
-
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   assert(MJTI && "Can't lower jump-table dispatch without JTI");
 
@@ -1377,10 +1378,8 @@ void AArch64AsmPrinter::LowerHardenedBRJumpTable(const 
MachineInstr &MI) {
  .addReg(AArch64::X16)
  .addImm(MaxTableEntry)
  .addImm(0));
-++InstsEmitted;
   } else {
 emitMOVZ(AArch64::X17, static_cast(MaxTableEntry), 0);
-++InstsEmitted;
 // It's sad that we have to manually materialize instructions, but we can't
 // trivially reuse the main pseudo expansion logic.
 // A MOVK sequence is easy enough to generate and handles the general case.
@@ -1389,14 +1388,12 @@ void AArch64AsmPrinter::LowerHardenedBRJumpTable(const 
MachineInstr &MI) {
 break;
   emitMOVK(AArch64::X17, static_cast(MaxTableEntry >> Offset),
Offset);
-  ++InstsEmitted;
 }
 EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::SUBSXrs)
  .addReg(AArch64::XZR)
  .addReg(AArch64::X16)
  .addReg(AArch64::X17)
  .addImm(0));
-++InstsEmitted;
   }
 
   // This picks entry #0 on failure.
@@ -1406,7 +1403,6 @@ void AArch64AsmPrinter::LowerHardenedBRJumpTable(const 
MachineInstr &MI) {
.addReg(AArch64::X16)
.addReg(AArch64::XZR)
.addImm(AArch64CC::LS));
-  ++InstsEmitted;
 
   // Prepare the @PAGE/@PAGEOFF low/high operands.
   MachineOperand JTMOHi(JTOp), JTMOLo(JTOp);
@@ -1421,14 +1417,12 @@ void AArch64AsmPrinter::LowerHardenedBRJumpTable(const 
MachineInstr &MI) {
   EmitToStreamer(
   *OutStreamer,
   MCInstBuilder(AArch64::ADRP).addReg(AArch64::X17).addOperand(JTMCHi));
-  ++InstsEmitted;
 
   EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXri)
.addReg(AArch64::X17)
.addReg(AArch64::X17)
.addOperand(JTMCLo)
.addImm(0));
-  ++InstsEmitted;
 
   EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX)
.addReg(AArch64::X16)
@@ -1436,7 +1430,6 @@ void AArch64AsmPrinter::LowerHardenedBRJumpTable(const 
MachineInstr &MI) {
.addReg(AArch64::X16)

[llvm-branch-commits] [mlir] [MLIR][OpenMP] Improve omp.section block arguments handling (PR #110266)

2024-09-30 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah approved this pull request.

Thanks!

https://github.com/llvm/llvm-project/pull/110266
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [Flang][OpenMP] Improve entry block argument creation and binding (PR #110267)

2024-09-30 Thread Sergio Afonso via llvm-branch-commits

https://github.com/skatrak updated 
https://github.com/llvm/llvm-project/pull/110267

>From 2c5d74d932797b916b5f0da6fb017b5f4af2b2b4 Mon Sep 17 00:00:00 2001
From: Sergio Afonso 
Date: Fri, 27 Sep 2024 13:51:27 +0100
Subject: [PATCH] [Flang][OpenMP] Improve entry block argument creation and
 binding

The main purpose of this patch is to centralize the logic for creating MLIR
operation entry blocks and for binding them to the corresponding symbols. This
minimizes the chances of mixing arguments up for operations having multiple
entry block argument-generating clauses and prevents divergence while binding
arguments.

Some changes implemented to this end are:
- Split into two functions the creation of the entry block, and the binding of
its arguments and the corresponding Fortran symbol. This enabled a significant
simplification of the lowering of composite constructs, where it's no longer
necessary to manually ensure the lists of arguments and symbols refer to the
same variables in the same order and also match the expected order by the
`BlockArgOpenMPOpInterface`.
- Removed redundant and error-prone passing of types and locations from
`ClauseProcessor` methods. Instead, these are obtained from the values in the
appropriate clause operands structure. This also simplifies argument lists of
several lowering functions.
- Access block arguments of already created MLIR operations through the
`BlockArgOpenMPOpInterface` instead of directly indexing the argument list of
the operation, which is not scalable as more entry block argument-generating
clauses are added to an operation.
- Simplified the implementation of `genParallelOp` to no longer need to define
different callbacks depending on whether delayed privatization is enabled.
---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp|   79 +-
 flang/lib/Lower/OpenMP/ClauseProcessor.h  |   38 +-
 flang/lib/Lower/OpenMP/OpenMP.cpp | 1016 +
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |5 +-
 flang/lib/Lower/OpenMP/ReductionProcessor.h   |3 +-
 flang/lib/Lower/OpenMP/Utils.cpp  |9 +-
 flang/lib/Lower/OpenMP/Utils.h|4 +-
 7 files changed, 554 insertions(+), 600 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp 
b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index e9ef8579100e93..44f5ca7f342707 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -166,15 +166,11 @@ getIfClauseOperand(lower::AbstractConverter &converter,
 static void addUseDeviceClause(
 lower::AbstractConverter &converter, const omp::ObjectList &objects,
 llvm::SmallVectorImpl &operands,
-llvm::SmallVectorImpl &useDeviceTypes,
-llvm::SmallVectorImpl &useDeviceLocs,
 llvm::SmallVectorImpl &useDeviceSyms) {
   genObjectList(objects, converter, operands);
-  for (mlir::Value &operand : operands) {
+  for (mlir::Value &operand : operands)
 checkMapType(operand.getLoc(), operand.getType());
-useDeviceTypes.push_back(operand.getType());
-useDeviceLocs.push_back(operand.getLoc());
-  }
+
   for (const omp::Object &object : objects)
 useDeviceSyms.push_back(object.sym());
 }
@@ -832,14 +828,12 @@ bool 
ClauseProcessor::processDepend(mlir::omp::DependClauseOps &result) const {
 
 bool ClauseProcessor::processHasDeviceAddr(
 mlir::omp::HasDeviceAddrClauseOps &result,
-llvm::SmallVectorImpl &isDeviceTypes,
-llvm::SmallVectorImpl &isDeviceLocs,
-llvm::SmallVectorImpl &isDeviceSymbols) const {
+llvm::SmallVectorImpl &isDeviceSyms) const {
   return findRepeatableClause(
   [&](const omp::clause::HasDeviceAddr &devAddrClause,
   const parser::CharBlock &) {
 addUseDeviceClause(converter, devAddrClause.v, 
result.hasDeviceAddrVars,
-   isDeviceTypes, isDeviceLocs, isDeviceSymbols);
+   isDeviceSyms);
   });
 }
 
@@ -864,14 +858,12 @@ bool ClauseProcessor::processIf(
 
 bool ClauseProcessor::processIsDevicePtr(
 mlir::omp::IsDevicePtrClauseOps &result,
-llvm::SmallVectorImpl &isDeviceTypes,
-llvm::SmallVectorImpl &isDeviceLocs,
-llvm::SmallVectorImpl &isDeviceSymbols) const {
+llvm::SmallVectorImpl &isDeviceSyms) const {
   return findRepeatableClause(
   [&](const omp::clause::IsDevicePtr &devPtrClause,
   const parser::CharBlock &) {
 addUseDeviceClause(converter, devPtrClause.v, result.isDevicePtrVars,
-   isDeviceTypes, isDeviceLocs, isDeviceSymbols);
+   isDeviceSyms);
   });
 }
 
@@ -892,9 +884,7 @@ void ClauseProcessor::processMapObjects(
 std::map> &parentMemberIndices,
 llvm::SmallVectorImpl &mapVars,
-llvm::SmallVectorImpl *mapSyms,
-llvm::SmallVectorImpl *mapSymLocs,
-llvm::SmallVectorImpl *mapSymTypes) const {
+llvm::SmallVectorImpl &mapSyms) const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
   for

[llvm-branch-commits] [llvm] [AMDGPU] Serialize WWM_REG vreg flag (PR #110229)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits


@@ -3614,3 +3614,14 @@ SIRegisterInfo::getSubRegAlignmentNumBits(const 
TargetRegisterClass *RC,
   }
   return 0;
 }
+
+SmallVector

arsenm wrote:

probably should just be const char*, this will probably only ever be used with 
literals 

https://github.com/llvm/llvm-project/pull/110229
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Serialize WWM_REG vreg flag (PR #110229)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits


@@ -0,0 +1,16 @@
+# RUN: llc -mtriple=amdgcn -run-pass=none -o - %s | FileCheck %s
+# This test ensures that the MIR parser parses virtual register flags correctly
+
+---
+name: vregs
+# CHECK: registers:
+# CHECK-NEXT:   - { id: 0, class: vgpr_32, preferred-register: '$vgpr1', 
flags: [ WWM_REG ] }
+# CHECK-NEXT:   - { id: 1, class: sgpr_64, preferred-register: '$sgpr0_sgpr1', 
flags: [  ] }
+# CHECK-NEXT:   - { id: 2, class: sgpr_64, preferred-register: '', flags: [  ] 
}
+registers:
+  - { id: 0, class: vgpr_32, preferred-register: $vgpr1, flags: [ WWM_REG ]}
+  - { id: 1, class: sgpr_64, preferred-register: $sgpr0_sgpr1 }

arsenm wrote:

Also test an explicitly empty case 

https://github.com/llvm/llvm-project/pull/110229
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Serialize WWM_REG vreg flag (PR #110229)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits


@@ -1628,6 +1628,21 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
 MFI->reserveWWMRegister(ParsedReg);
   }
 
+  auto setRegisterFlags = [&](const VRegInfo &Info) {
+for (const auto &Flag : Info.Flags) {
+  MFI->setFlag(Info.VReg, Flag);
+}
+  };
+
+  for (const auto &P : PFS.VRegInfosNamed) {
+const VRegInfo &Info = *P.second;
+setRegisterFlags(Info);
+  }
+  for (const auto &P : PFS.VRegInfos) {
+const VRegInfo &Info = *P.second;

arsenm wrote:

c++17 destructuring

https://github.com/llvm/llvm-project/pull/110229
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Serialize WWM_REG vreg flag (PR #110229)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits


@@ -1628,6 +1628,21 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
 MFI->reserveWWMRegister(ParsedReg);
   }
 
+  auto setRegisterFlags = [&](const VRegInfo &Info) {
+for (const auto &Flag : Info.Flags) {
+  MFI->setFlag(Info.VReg, Flag);
+}
+  };
+
+  for (const auto &P : PFS.VRegInfosNamed) {
+const VRegInfo &Info = *P.second;

arsenm wrote:

c++17 destructuring 

https://github.com/llvm/llvm-project/pull/110229
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Serialize WWM_REG vreg flag (PR #110229)

2024-09-30 Thread Matt Arsenault via llvm-branch-commits


@@ -1628,6 +1628,21 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
 MFI->reserveWWMRegister(ParsedReg);
   }
 
+  auto setRegisterFlags = [&](const VRegInfo &Info) {
+for (const auto &Flag : Info.Flags) {

arsenm wrote:

No auto, no reference. This is just uint8_t 

https://github.com/llvm/llvm-project/pull/110229
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AArch64] Generalize the instruction size checking in AsmPrinter (PR #110108)

2024-09-30 Thread Anatoly Trosinenko via llvm-branch-commits

atrosinenko wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/110108?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#110108** https://app.graphite.dev/github/pr/llvm/llvm-project/110108?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#110107** https://app.graphite.dev/github/pr/llvm/llvm-project/110107?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @atrosinenko and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/110108
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][Transforms][NFC] Dialect Conversion: Simplify `finalize` signature (PR #110419)

2024-09-30 Thread Markus Böck via llvm-branch-commits

https://github.com/zero9178 approved this pull request.


https://github.com/llvm/llvm-project/pull/110419
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT] Support --show-density for fdata and YAML profiles (PR #110567)

2024-09-30 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov created 
https://github.com/llvm/llvm-project/pull/110567

None


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Document entry block argument-defining clauses (NFC) (PR #109811)

2024-09-30 Thread Pranav Bhandarkar via llvm-branch-commits


@@ -285,7 +285,75 @@ argument's type:
   specific `mlir::Attribute` subclass) will be used instead.
   - Other attribute types will be represented with their `storageType`.
 - It will create `Operands` structure for each operation, which is an
-empty structure subclassing all operand structures defined for the 
corresponding `OpenMP_Op`'s clauses.
+empty structure subclassing all operand structures defined for the 
corresponding
+`OpenMP_Op`'s clauses.
+
+### Entry Block Argument-Defining Clauses
+
+Certain OpenMP clauses introduce in their MLIR representation mappings between
+outside values and entry block arguments for the region of the MLIR operation
+they are applied to. This enables, for example, the introduction of private

bhandarkar-pranav wrote:

Please consider
```
This enables, for example, the introduction of private copies of the same 
underlying variable defined outside the MLIR operation the clause is attached 
to.
```

https://github.com/llvm/llvm-project/pull/109811
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Document entry block argument-defining clauses (NFC) (PR #109811)

2024-09-30 Thread Pranav Bhandarkar via llvm-branch-commits


@@ -285,7 +285,75 @@ argument's type:
   specific `mlir::Attribute` subclass) will be used instead.
   - Other attribute types will be represented with their `storageType`.
 - It will create `Operands` structure for each operation, which is an
-empty structure subclassing all operand structures defined for the 
corresponding `OpenMP_Op`'s clauses.
+empty structure subclassing all operand structures defined for the 
corresponding
+`OpenMP_Op`'s clauses.
+
+### Entry Block Argument-Defining Clauses
+
+Certain OpenMP clauses introduce in their MLIR representation mappings between
+outside values and entry block arguments for the region of the MLIR operation
+they are applied to. This enables, for example, the introduction of private
+copies of the same underlying variable. Currently, clauses with this property
+can be classified in three main categories:
+  - Map-like clauses: `map`, `use_device_addr` and `use_device_ptr`.
+  - Reduction-like clauses: `in_reduction`, `reduction` and `task_reduction`.
+  - Privatization clause: `private`.
+
+All three kinds of entry block argument-defining clauses use a similar custom
+assembly format representation, only differing based on the different pieces of
+information attached to each kind. Below, one example of each is shown:
+
+```mlir
+omp.target map_entries(%x -> %x.m, %y -> %y.m : !llvm.ptr, !llvm.ptr) {
+  // Use %x.m, %y.m in place of %x and %y...
+}
+
+omp.wsloop reduction(@add.i32 %x -> %x.r, byref @add.f32 %y -> %y.r : 
!llvm.ptr, !llvm.ptr) {
+  // Use %x.r, %y.r in place of %x and %y...
+}
+
+omp.parallel private(@x.privatizer %x -> %x.p, @y.privatizer %y -> %y.p : 
!llvm.ptr, !llvm.ptr) {
+  // Use %x.p, %y.p in place of %x and %y...
+}
+```
+
+As a consequence of parsing and printing the operation's first region entry
+block argument names together with the custom assembly format of these clauses,
+entry block arguments (i.e. the `^bb0(...):` line) must not be explicitly
+defined for these operations. Additionally, it is not possible to implement 
this
+feature while allowing each clause to be independently parsed and printed,
+because they need to be printed/parsed together with the corresponding
+operation's first region. They must have a well-defined ordering in which
+multiple of these clauses are specified for a given operation, as well.
+
+The parsing/printing of these clauses together with the region provides the
+ability to define entry block arguments directly after the `->`. Forcing a
+specific ordering between these clauses makes the block argument ordering
+well-defined, which is the property used to easily match each clause with the
+entry block arguments defined by it.
+
+Custom printers and parsers for operation regions based on the entry block
+argument-defining clauses they take are implemented based on the
+`{parse,print}BlockArgRegion` functions, which take care of the sorting and
+formatting of each kind of clause, minimizing code duplication resulting from
+this approach. One example of the custom assembly format of an operation taking
+the `private` and `reduction` clauses is the following:
+
+```tablegen
+let assemblyFormat = clausesAssemblyFormat # [{
+  custom($region, $private_vars, type($private_vars),
+  $private_syms, $reduction_vars, type($reduction_vars), $reduction_byref,
+  $reduction_syms) attr-dict
+}];
+```
+
+The `BlockArgOpenMPOpInterface` has been introduced to simplify the addition 
and
+handling of these kinds of clauses. It holds `numBlockArgs()`
+functions that by default return 0, to be overriden by each clause through the
+`extraClassDeclaration` property. Based on these functions and the expected
+alphabetical sorting between entry block argument-defining clauses, it

bhandarkar-pranav wrote:

I am assuming the tablegen backend for openmp that you have implemented doesn't 
do the sorting and the onus for the alphabetical sorting is on the user, 
correct? If that's the case i think that expectation must be made explicit, 
either  here or (preferably) in [Adding an 
Operation](https://mlir.llvm.org/docs/Dialects/OpenMPDialect/#adding-an-operation)


https://github.com/llvm/llvm-project/pull/109811
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Document entry block argument-defining clauses (NFC) (PR #109811)

2024-09-30 Thread Pranav Bhandarkar via llvm-branch-commits

https://github.com/bhandarkar-pranav edited 
https://github.com/llvm/llvm-project/pull/109811
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Document entry block argument-defining clauses (NFC) (PR #109811)

2024-09-30 Thread Pranav Bhandarkar via llvm-branch-commits

https://github.com/bhandarkar-pranav commented:

LGTM. Given how reviewing docs essentially turns into a series of subjective 
opinions or preferences, please consider most all of my comments as nits, 
except the one about alphabetical sorting of clauses.

https://github.com/llvm/llvm-project/pull/109811
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Document entry block argument-defining clauses (NFC) (PR #109811)

2024-09-30 Thread Pranav Bhandarkar via llvm-branch-commits


@@ -285,7 +285,75 @@ argument's type:
   specific `mlir::Attribute` subclass) will be used instead.
   - Other attribute types will be represented with their `storageType`.
 - It will create `Operands` structure for each operation, which is an
-empty structure subclassing all operand structures defined for the 
corresponding `OpenMP_Op`'s clauses.
+empty structure subclassing all operand structures defined for the 
corresponding
+`OpenMP_Op`'s clauses.
+
+### Entry Block Argument-Defining Clauses
+
+Certain OpenMP clauses introduce in their MLIR representation mappings between

bhandarkar-pranav wrote:

Could you consider the following reordering and 
slight rewording of the first sentence?
```
In their MLIR representation, certain OpenMP clauses introduce a mapping 
between values defined outside the operation they are applied to and entry 
block arguments for the region of that MLIR operation.
```

https://github.com/llvm/llvm-project/pull/109811
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Document entry block argument-defining clauses (NFC) (PR #109811)

2024-09-30 Thread Pranav Bhandarkar via llvm-branch-commits


@@ -285,7 +285,75 @@ argument's type:
   specific `mlir::Attribute` subclass) will be used instead.
   - Other attribute types will be represented with their `storageType`.
 - It will create `Operands` structure for each operation, which is an
-empty structure subclassing all operand structures defined for the 
corresponding `OpenMP_Op`'s clauses.
+empty structure subclassing all operand structures defined for the 
corresponding
+`OpenMP_Op`'s clauses.
+
+### Entry Block Argument-Defining Clauses
+
+Certain OpenMP clauses introduce in their MLIR representation mappings between
+outside values and entry block arguments for the region of the MLIR operation
+they are applied to. This enables, for example, the introduction of private
+copies of the same underlying variable. Currently, clauses with this property
+can be classified in three main categories:

bhandarkar-pranav wrote:

`s/in three/into three`

https://github.com/llvm/llvm-project/pull/109811
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Document entry block argument-defining clauses (NFC) (PR #109811)

2024-09-30 Thread Pranav Bhandarkar via llvm-branch-commits


@@ -285,7 +285,75 @@ argument's type:
   specific `mlir::Attribute` subclass) will be used instead.
   - Other attribute types will be represented with their `storageType`.
 - It will create `Operands` structure for each operation, which is an
-empty structure subclassing all operand structures defined for the 
corresponding `OpenMP_Op`'s clauses.
+empty structure subclassing all operand structures defined for the 
corresponding
+`OpenMP_Op`'s clauses.
+
+### Entry Block Argument-Defining Clauses
+
+Certain OpenMP clauses introduce in their MLIR representation mappings between
+outside values and entry block arguments for the region of the MLIR operation
+they are applied to. This enables, for example, the introduction of private
+copies of the same underlying variable. Currently, clauses with this property
+can be classified in three main categories:
+  - Map-like clauses: `map`, `use_device_addr` and `use_device_ptr`.
+  - Reduction-like clauses: `in_reduction`, `reduction` and `task_reduction`.
+  - Privatization clause: `private`.

bhandarkar-pranav wrote:

Ultra Nit: I think it should be `Privatization clauses:` even if the set has a 
solitary element.

https://github.com/llvm/llvm-project/pull/109811
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] a7554df - [LoongArch][ISel] Check the number of sign bits in `PatGprGpr_32` (#107432)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Yingwei Zheng
Date: 2024-10-01T08:51:27+02:00
New Revision: a7554dfc222b13624426ebd6ef46e122b9c16ee7

URL: 
https://github.com/llvm/llvm-project/commit/a7554dfc222b13624426ebd6ef46e122b9c16ee7
DIFF: 
https://github.com/llvm/llvm-project/commit/a7554dfc222b13624426ebd6ef46e122b9c16ee7.diff

LOG:  [LoongArch][ISel] Check the number of sign bits in `PatGprGpr_32` 
(#107432)

After https://github.com/llvm/llvm-project/pull/92205, LoongArch ISel
selects `div.w` for `trunc i64 (sdiv i64 3202030857, (sext i32 X to
i64)) to i32`. It is incorrect since `3202030857` is not a signed 32-bit
constant. It will produce wrong result when `X == 2`:
https://alive2.llvm.org/ce/z/pzfGZZ

This patch adds additional `sexti32` checks to operands of
`PatGprGpr_32`.
Alive2 proof: https://alive2.llvm.org/ce/z/AkH5Mp

Fix #107414.

(cherry picked from commit a111f9119a5ec77c19a514ec09454218f739454f)

Added: 


Modified: 
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll

Removed: 




diff  --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td 
b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index ef647a42778737..339d50bd819217 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1065,10 +1065,13 @@ def RDTIME_D : RDTIME_2R<0x6800>;
 
 /// Generic pattern classes
 
+def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
+  return cast(N->getOperand(1))->getVT().bitsLE(MVT::i32);
+}]>;
 class PatGprGpr
 : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>;
 class PatGprGpr_32
-: Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, 
GPR:$rk)>;
+: Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 
GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>;
 class PatGpr
 : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>;
 

diff  --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll 
b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index ab3eec240db3c1..c22acdb4969071 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,7 +191,8 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:   # %bb.0: # %entry
 ; LA64-NEXT:addi.w $a1, $a1, 0
 ; LA64-NEXT:addi.w $a0, $a0, 0
-; LA64-NEXT:div.w $a0, $a0, $a1
+; LA64-NEXT:div.d $a0, $a0, $a1
+; LA64-NEXT:addi.w $a0, $a0, 0
 ; LA64-NEXT:ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -207,11 +208,12 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:   # %bb.0: # %entry
 ; LA64-TRAP-NEXT:addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:div.w $a0, $a0, $a1
+; LA64-TRAP-NEXT:div.d $a0, $a0, $a1
 ; LA64-TRAP-NEXT:bnez $a1, .LBB5_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:break 7
 ; LA64-TRAP-NEXT:  .LBB5_2: # %entry
+; LA64-TRAP-NEXT:addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:ret
 entry:
   %r = sdiv i32 %a, %b
@@ -1151,3 +1153,64 @@ entry:
   %r = urem i64 %a, %b
   ret i64 %r
 }
+
+define signext i32 @pr107414(i32 signext %x) {
+; LA32-LABEL: pr107414:
+; LA32:   # %bb.0: # %entry
+; LA32-NEXT:addi.w $sp, $sp, -16
+; LA32-NEXT:.cfi_def_cfa_offset 16
+; LA32-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:.cfi_offset 1, -4
+; LA32-NEXT:move $a2, $a0
+; LA32-NEXT:srai.w $a3, $a0, 31
+; LA32-NEXT:lu12i.w $a0, -266831
+; LA32-NEXT:ori $a0, $a0, 3337
+; LA32-NEXT:move $a1, $zero
+; LA32-NEXT:bl %plt(__divdi3)
+; LA32-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:addi.w $sp, $sp, 16
+; LA32-NEXT:ret
+;
+; LA64-LABEL: pr107414:
+; LA64:   # %bb.0: # %entry
+; LA64-NEXT:lu12i.w $a1, -266831
+; LA64-NEXT:ori $a1, $a1, 3337
+; LA64-NEXT:lu32i.d $a1, 0
+; LA64-NEXT:div.d $a0, $a1, $a0
+; LA64-NEXT:addi.w $a0, $a0, 0
+; LA64-NEXT:ret
+;
+; LA32-TRAP-LABEL: pr107414:
+; LA32-TRAP:   # %bb.0: # %entry
+; LA32-TRAP-NEXT:addi.w $sp, $sp, -16
+; LA32-TRAP-NEXT:.cfi_def_cfa_offset 16
+; LA32-TRAP-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-TRAP-NEXT:.cfi_offset 1, -4
+; LA32-TRAP-NEXT:move $a2, $a0
+; LA32-TRAP-NEXT:srai.w $a3, $a0, 31
+; LA32-TRAP-NEXT:lu12i.w $a0, -266831
+; LA32-TRAP-NEXT:ori $a0, $a0, 3337
+; LA32-TRAP-NEXT:move $a1, $zero
+; LA32-TRAP-NEXT:bl %plt(__divdi3)
+; LA32-TRAP-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-TRAP-NEXT:addi.w $sp, $sp, 16
+; LA32-TRAP-NEXT:ret
+;
+; LA64-TRAP-LABEL: pr107414:
+; LA64-TRAP:   # %bb.0: # %entry
+; LA64-TRAP-NEXT:lu12i.w $a1, -266831
+; LA64-TRAP-NEXT:ori $a1, $a1, 3337
+; LA64-TRAP-NEXT:lu32i.d $a1, 0
+; LA64-TRAP

[llvm-branch-commits] [llvm] 9905852 - [LoongArch] Eliminate the redundant sign extension of division (#107971)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: hev
Date: 2024-10-01T08:51:27+02:00
New Revision: 99058521d4c80635f60b2c1442b683395e0ee818

URL: 
https://github.com/llvm/llvm-project/commit/99058521d4c80635f60b2c1442b683395e0ee818
DIFF: 
https://github.com/llvm/llvm-project/commit/99058521d4c80635f60b2c1442b683395e0ee818.diff

LOG: [LoongArch] Eliminate the redundant sign extension of division (#107971)

If all incoming values of `div.d` are sign-extended and all users only
use the lower 32 bits, then convert them to W versions.

Fixes: #107946
(cherry picked from commit 0f47e3aebdd2a4a938468a272ea4224552dbf176)

Added: 


Modified: 
llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll

Removed: 




diff  --git a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp 
b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
index abac69054f3b91..ab90409fdf47d0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
@@ -637,6 +637,19 @@ static bool isSignExtendedW(Register SrcReg, const 
LoongArchSubtarget &ST,
 break;
   }
   return false;
+// If all incoming values are sign-extended and all users only use
+// the lower 32 bits, then convert them to W versions.
+case LoongArch::DIV_D: {
+  if (!AddRegToWorkList(MI->getOperand(1).getReg()))
+return false;
+  if (!AddRegToWorkList(MI->getOperand(2).getReg()))
+return false;
+  if (hasAllWUsers(*MI, ST, MRI)) {
+FixableDef.insert(MI);
+break;
+  }
+  return false;
+}
 }
   }
 
@@ -651,6 +664,8 @@ static unsigned getWOp(unsigned Opcode) {
 return LoongArch::ADDI_W;
   case LoongArch::ADD_D:
 return LoongArch::ADD_W;
+  case LoongArch::DIV_D:
+return LoongArch::DIV_W;
   case LoongArch::LD_D:
   case LoongArch::LD_WU:
 return LoongArch::LD_W;

diff  --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll 
b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index c22acdb4969071..c5af79157eaadc 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,8 +191,7 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:   # %bb.0: # %entry
 ; LA64-NEXT:addi.w $a1, $a1, 0
 ; LA64-NEXT:addi.w $a0, $a0, 0
-; LA64-NEXT:div.d $a0, $a0, $a1
-; LA64-NEXT:addi.w $a0, $a0, 0
+; LA64-NEXT:div.w $a0, $a0, $a1
 ; LA64-NEXT:ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -208,12 +207,11 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:   # %bb.0: # %entry
 ; LA64-TRAP-NEXT:addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:div.d $a0, $a0, $a1
+; LA64-TRAP-NEXT:div.w $a0, $a0, $a1
 ; LA64-TRAP-NEXT:bnez $a1, .LBB5_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:break 7
 ; LA64-TRAP-NEXT:  .LBB5_2: # %entry
-; LA64-TRAP-NEXT:addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:ret
 entry:
   %r = sdiv i32 %a, %b



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109125

>From a7554dfc222b13624426ebd6ef46e122b9c16ee7 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng 
Date: Tue, 10 Sep 2024 09:19:39 +0800
Subject: [PATCH 1/2]  [LoongArch][ISel] Check the number of sign bits in
 `PatGprGpr_32` (#107432)

After https://github.com/llvm/llvm-project/pull/92205, LoongArch ISel
selects `div.w` for `trunc i64 (sdiv i64 3202030857, (sext i32 X to
i64)) to i32`. It is incorrect since `3202030857` is not a signed 32-bit
constant. It will produce wrong result when `X == 2`:
https://alive2.llvm.org/ce/z/pzfGZZ

This patch adds additional `sexti32` checks to operands of
`PatGprGpr_32`.
Alive2 proof: https://alive2.llvm.org/ce/z/AkH5Mp

Fix #107414.

(cherry picked from commit a111f9119a5ec77c19a514ec09454218f739454f)
---
 .../Target/LoongArch/LoongArchInstrInfo.td|  5 +-
 .../ir-instruction/sdiv-udiv-srem-urem.ll | 67 ++-
 2 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td 
b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index ef647a42778737..339d50bd819217 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1065,10 +1065,13 @@ def RDTIME_D : RDTIME_2R<0x6800>;
 
 /// Generic pattern classes
 
+def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
+  return cast(N->getOperand(1))->getVT().bitsLE(MVT::i32);
+}]>;
 class PatGprGpr
 : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>;
 class PatGprGpr_32
-: Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, 
GPR:$rk)>;
+: Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 
GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>;
 class PatGpr
 : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>;
 
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll 
b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
index ab3eec240db3c1..c22acdb4969071 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll
@@ -191,7 +191,8 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64:   # %bb.0: # %entry
 ; LA64-NEXT:addi.w $a1, $a1, 0
 ; LA64-NEXT:addi.w $a0, $a0, 0
-; LA64-NEXT:div.w $a0, $a0, $a1
+; LA64-NEXT:div.d $a0, $a0, $a1
+; LA64-NEXT:addi.w $a0, $a0, 0
 ; LA64-NEXT:ret
 ;
 ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32:
@@ -207,11 +208,12 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) {
 ; LA64-TRAP:   # %bb.0: # %entry
 ; LA64-TRAP-NEXT:addi.w $a1, $a1, 0
 ; LA64-TRAP-NEXT:addi.w $a0, $a0, 0
-; LA64-TRAP-NEXT:div.w $a0, $a0, $a1
+; LA64-TRAP-NEXT:div.d $a0, $a0, $a1
 ; LA64-TRAP-NEXT:bnez $a1, .LBB5_2
 ; LA64-TRAP-NEXT:  # %bb.1: # %entry
 ; LA64-TRAP-NEXT:break 7
 ; LA64-TRAP-NEXT:  .LBB5_2: # %entry
+; LA64-TRAP-NEXT:addi.w $a0, $a0, 0
 ; LA64-TRAP-NEXT:ret
 entry:
   %r = sdiv i32 %a, %b
@@ -1151,3 +1153,64 @@ entry:
   %r = urem i64 %a, %b
   ret i64 %r
 }
+
+define signext i32 @pr107414(i32 signext %x) {
+; LA32-LABEL: pr107414:
+; LA32:   # %bb.0: # %entry
+; LA32-NEXT:addi.w $sp, $sp, -16
+; LA32-NEXT:.cfi_def_cfa_offset 16
+; LA32-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:.cfi_offset 1, -4
+; LA32-NEXT:move $a2, $a0
+; LA32-NEXT:srai.w $a3, $a0, 31
+; LA32-NEXT:lu12i.w $a0, -266831
+; LA32-NEXT:ori $a0, $a0, 3337
+; LA32-NEXT:move $a1, $zero
+; LA32-NEXT:bl %plt(__divdi3)
+; LA32-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:addi.w $sp, $sp, 16
+; LA32-NEXT:ret
+;
+; LA64-LABEL: pr107414:
+; LA64:   # %bb.0: # %entry
+; LA64-NEXT:lu12i.w $a1, -266831
+; LA64-NEXT:ori $a1, $a1, 3337
+; LA64-NEXT:lu32i.d $a1, 0
+; LA64-NEXT:div.d $a0, $a1, $a0
+; LA64-NEXT:addi.w $a0, $a0, 0
+; LA64-NEXT:ret
+;
+; LA32-TRAP-LABEL: pr107414:
+; LA32-TRAP:   # %bb.0: # %entry
+; LA32-TRAP-NEXT:addi.w $sp, $sp, -16
+; LA32-TRAP-NEXT:.cfi_def_cfa_offset 16
+; LA32-TRAP-NEXT:st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-TRAP-NEXT:.cfi_offset 1, -4
+; LA32-TRAP-NEXT:move $a2, $a0
+; LA32-TRAP-NEXT:srai.w $a3, $a0, 31
+; LA32-TRAP-NEXT:lu12i.w $a0, -266831
+; LA32-TRAP-NEXT:ori $a0, $a0, 3337
+; LA32-TRAP-NEXT:move $a1, $zero
+; LA32-TRAP-NEXT:bl %plt(__divdi3)
+; LA32-TRAP-NEXT:ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-TRAP-NEXT:addi.w $sp, $sp, 16
+; LA32-TRAP-NEXT:ret
+;
+; LA64-TRAP-LABEL: pr107414:
+; LA64-TRAP:   # %bb.0: # %entry
+; LA64-TRAP-NEXT:lu12i.w $a1, -266831
+; LA64-TRAP-NEXT:ori $a1, $a1, 3337
+; LA64-TRAP-NEXT:lu32i.d $a1, 0
+; LA64-TRAP-NEXT:div.d $a1, $a1, $a0
+; LA64-TRAP-NEXT:bnez $a0, .LBB32_2
+; LA64-TRAP-NEXT:  # %bb.1: # %entry
+; LA64-TRAP-NEXT:break 7

[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/109125
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AArch64] Generalize the instruction size checking in AsmPrinter (PR #110108)

2024-09-30 Thread David Green via llvm-branch-commits


@@ -2546,6 +2510,7 @@ void AArch64AsmPrinter::emitInstruction(const 
MachineInstr *MI) {
 TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
 TLSDescCall.addOperand(Sym);
 EmitToStreamer(*OutStreamer, TLSDescCall);
+--InstsEmitted; // no code emitted

davemgreen wrote:

Will this need #ifndef NDEBUG?

https://github.com/llvm/llvm-project/pull/110108
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Fix the assertion for atomic store with 'ptr' type (PR #109915)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109915

>From b3734d9f93c1f8d908836a966f77c6792242df99 Mon Sep 17 00:00:00 2001
From: Weining Lu 
Date: Mon, 19 Aug 2024 16:51:21 +0800
Subject: [PATCH] [LoongArch] Fix the assertion for atomic store with 'ptr'
 type

(cherry picked from commit 63267ca9016aa334b329aa408716456b4e3799c8)
---
 .../LoongArch/LoongArchISelLowering.cpp   |   5 +-
 .../ir-instruction/load-store-atomic.ll   | 119 ++
 2 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 93edafaff553ba..082b42398c6a71 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -5601,8 +5601,9 @@ bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
 
   // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
   // require fences beacuse we can use amswap_db.[w/d].
-  if (isa(I)) {
-unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
+  Type *Ty = I->getOperand(0)->getType();
+  if (isa(I) && Ty->isIntegerTy()) {
+unsigned Size = Ty->getIntegerBitWidth();
 return (Size == 8 || Size == 16);
   }
 
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll 
b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
index c51fded410e83b..1af2b38d799436 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
@@ -72,6 +72,22 @@ define i64 @load_acquire_i64(ptr %ptr) {
   ret i64 %val
 }
 
+define ptr @load_acquire_ptr(ptr %ptr) {
+; LA32-LABEL: load_acquire_ptr:
+; LA32:   # %bb.0:
+; LA32-NEXT:ld.w $a0, $a0, 0
+; LA32-NEXT:dbar 20
+; LA32-NEXT:ret
+;
+; LA64-LABEL: load_acquire_ptr:
+; LA64:   # %bb.0:
+; LA64-NEXT:ld.d $a0, $a0, 0
+; LA64-NEXT:dbar 20
+; LA64-NEXT:ret
+  %val = load atomic ptr, ptr %ptr acquire, align 8
+  ret ptr %val
+}
+
 define i8 @load_unordered_i8(ptr %ptr) {
 ; LA32-LABEL: load_unordered_i8:
 ; LA32:   # %bb.0:
@@ -135,6 +151,20 @@ define i64 @load_unordered_i64(ptr %ptr) {
   ret i64 %val
 }
 
+define ptr @load_unordered_ptr(ptr %ptr) {
+; LA32-LABEL: load_unordered_ptr:
+; LA32:   # %bb.0:
+; LA32-NEXT:ld.w $a0, $a0, 0
+; LA32-NEXT:ret
+;
+; LA64-LABEL: load_unordered_ptr:
+; LA64:   # %bb.0:
+; LA64-NEXT:ld.d $a0, $a0, 0
+; LA64-NEXT:ret
+  %val = load atomic ptr, ptr %ptr unordered, align 8
+  ret ptr %val
+}
+
 define i8 @load_monotonic_i8(ptr %ptr) {
 ; LA32-LABEL: load_monotonic_i8:
 ; LA32:   # %bb.0:
@@ -198,6 +228,20 @@ define i64 @load_monotonic_i64(ptr %ptr) {
   ret i64 %val
 }
 
+define ptr @load_monotonic_ptr(ptr %ptr) {
+; LA32-LABEL: load_monotonic_ptr:
+; LA32:   # %bb.0:
+; LA32-NEXT:ld.w $a0, $a0, 0
+; LA32-NEXT:ret
+;
+; LA64-LABEL: load_monotonic_ptr:
+; LA64:   # %bb.0:
+; LA64-NEXT:ld.d $a0, $a0, 0
+; LA64-NEXT:ret
+  %val = load atomic ptr, ptr %ptr monotonic, align 8
+  ret ptr %val
+}
+
 define i8 @load_seq_cst_i8(ptr %ptr) {
 ; LA32-LABEL: load_seq_cst_i8:
 ; LA32:   # %bb.0:
@@ -268,6 +312,22 @@ define i64 @load_seq_cst_i64(ptr %ptr) {
   ret i64 %val
 }
 
+define ptr @load_seq_cst_ptr(ptr %ptr) {
+; LA32-LABEL: load_seq_cst_ptr:
+; LA32:   # %bb.0:
+; LA32-NEXT:ld.w $a0, $a0, 0
+; LA32-NEXT:dbar 16
+; LA32-NEXT:ret
+;
+; LA64-LABEL: load_seq_cst_ptr:
+; LA64:   # %bb.0:
+; LA64-NEXT:ld.d $a0, $a0, 0
+; LA64-NEXT:dbar 16
+; LA64-NEXT:ret
+  %val = load atomic ptr, ptr %ptr seq_cst, align 8
+  ret ptr %val
+}
+
 define void @store_release_i8(ptr %ptr, i8 signext %v) {
 ; LA32-LABEL: store_release_i8:
 ; LA32:   # %bb.0:
@@ -336,6 +396,21 @@ define void @store_release_i64(ptr %ptr, i64 %v) {
   ret void
 }
 
+define void @store_release_ptr(ptr %ptr, ptr %v) {
+; LA32-LABEL: store_release_ptr:
+; LA32:   # %bb.0:
+; LA32-NEXT:dbar 18
+; LA32-NEXT:st.w $a1, $a0, 0
+; LA32-NEXT:ret
+;
+; LA64-LABEL: store_release_ptr:
+; LA64:   # %bb.0:
+; LA64-NEXT:amswap_db.d $zero, $a1, $a0
+; LA64-NEXT:ret
+  store atomic ptr %v, ptr %ptr release, align 8
+  ret void
+}
+
 define void @store_unordered_i8(ptr %ptr, i8 signext %v) {
 ; LA32-LABEL: store_unordered_i8:
 ; LA32:   # %bb.0:
@@ -399,6 +474,20 @@ define void @store_unordered_i64(ptr %ptr, i64 %v) {
   ret void
 }
 
+define void @store_unordered_ptr(ptr %ptr, ptr %v) {
+; LA32-LABEL: store_unordered_ptr:
+; LA32:   # %bb.0:
+; LA32-NEXT:st.w $a1, $a0, 0
+; LA32-NEXT:ret
+;
+; LA64-LABEL: store_unordered_ptr:
+; LA64:   # %bb.0:
+; LA64-NEXT:st.d $a1, $a0, 0
+; LA64-NEXT:ret
+  store atomic ptr %v, ptr %ptr unordered, align 8
+  ret void
+}
+
 define void @store_monotonic_i8(ptr %ptr, i8 signext %v) {
 ; LA32-LABEL: store_monotonic

[llvm-branch-commits] [llvm] b3734d9 - [LoongArch] Fix the assertion for atomic store with 'ptr' type

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Weining Lu
Date: 2024-10-01T08:53:44+02:00
New Revision: b3734d9f93c1f8d908836a966f77c6792242df99

URL: 
https://github.com/llvm/llvm-project/commit/b3734d9f93c1f8d908836a966f77c6792242df99
DIFF: 
https://github.com/llvm/llvm-project/commit/b3734d9f93c1f8d908836a966f77c6792242df99.diff

LOG: [LoongArch] Fix the assertion for atomic store with 'ptr' type

(cherry picked from commit 63267ca9016aa334b329aa408716456b4e3799c8)

Added: 


Modified: 
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll

Removed: 




diff  --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 93edafaff553ba..082b42398c6a71 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -5601,8 +5601,9 @@ bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
 
   // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
   // require fences beacuse we can use amswap_db.[w/d].
-  if (isa(I)) {
-unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
+  Type *Ty = I->getOperand(0)->getType();
+  if (isa(I) && Ty->isIntegerTy()) {
+unsigned Size = Ty->getIntegerBitWidth();
 return (Size == 8 || Size == 16);
   }
 

diff  --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll 
b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
index c51fded410e83b..1af2b38d799436 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll
@@ -72,6 +72,22 @@ define i64 @load_acquire_i64(ptr %ptr) {
   ret i64 %val
 }
 
+define ptr @load_acquire_ptr(ptr %ptr) {
+; LA32-LABEL: load_acquire_ptr:
+; LA32:   # %bb.0:
+; LA32-NEXT:ld.w $a0, $a0, 0
+; LA32-NEXT:dbar 20
+; LA32-NEXT:ret
+;
+; LA64-LABEL: load_acquire_ptr:
+; LA64:   # %bb.0:
+; LA64-NEXT:ld.d $a0, $a0, 0
+; LA64-NEXT:dbar 20
+; LA64-NEXT:ret
+  %val = load atomic ptr, ptr %ptr acquire, align 8
+  ret ptr %val
+}
+
 define i8 @load_unordered_i8(ptr %ptr) {
 ; LA32-LABEL: load_unordered_i8:
 ; LA32:   # %bb.0:
@@ -135,6 +151,20 @@ define i64 @load_unordered_i64(ptr %ptr) {
   ret i64 %val
 }
 
+define ptr @load_unordered_ptr(ptr %ptr) {
+; LA32-LABEL: load_unordered_ptr:
+; LA32:   # %bb.0:
+; LA32-NEXT:ld.w $a0, $a0, 0
+; LA32-NEXT:ret
+;
+; LA64-LABEL: load_unordered_ptr:
+; LA64:   # %bb.0:
+; LA64-NEXT:ld.d $a0, $a0, 0
+; LA64-NEXT:ret
+  %val = load atomic ptr, ptr %ptr unordered, align 8
+  ret ptr %val
+}
+
 define i8 @load_monotonic_i8(ptr %ptr) {
 ; LA32-LABEL: load_monotonic_i8:
 ; LA32:   # %bb.0:
@@ -198,6 +228,20 @@ define i64 @load_monotonic_i64(ptr %ptr) {
   ret i64 %val
 }
 
+define ptr @load_monotonic_ptr(ptr %ptr) {
+; LA32-LABEL: load_monotonic_ptr:
+; LA32:   # %bb.0:
+; LA32-NEXT:ld.w $a0, $a0, 0
+; LA32-NEXT:ret
+;
+; LA64-LABEL: load_monotonic_ptr:
+; LA64:   # %bb.0:
+; LA64-NEXT:ld.d $a0, $a0, 0
+; LA64-NEXT:ret
+  %val = load atomic ptr, ptr %ptr monotonic, align 8
+  ret ptr %val
+}
+
 define i8 @load_seq_cst_i8(ptr %ptr) {
 ; LA32-LABEL: load_seq_cst_i8:
 ; LA32:   # %bb.0:
@@ -268,6 +312,22 @@ define i64 @load_seq_cst_i64(ptr %ptr) {
   ret i64 %val
 }
 
+define ptr @load_seq_cst_ptr(ptr %ptr) {
+; LA32-LABEL: load_seq_cst_ptr:
+; LA32:   # %bb.0:
+; LA32-NEXT:ld.w $a0, $a0, 0
+; LA32-NEXT:dbar 16
+; LA32-NEXT:ret
+;
+; LA64-LABEL: load_seq_cst_ptr:
+; LA64:   # %bb.0:
+; LA64-NEXT:ld.d $a0, $a0, 0
+; LA64-NEXT:dbar 16
+; LA64-NEXT:ret
+  %val = load atomic ptr, ptr %ptr seq_cst, align 8
+  ret ptr %val
+}
+
 define void @store_release_i8(ptr %ptr, i8 signext %v) {
 ; LA32-LABEL: store_release_i8:
 ; LA32:   # %bb.0:
@@ -336,6 +396,21 @@ define void @store_release_i64(ptr %ptr, i64 %v) {
   ret void
 }
 
+define void @store_release_ptr(ptr %ptr, ptr %v) {
+; LA32-LABEL: store_release_ptr:
+; LA32:   # %bb.0:
+; LA32-NEXT:dbar 18
+; LA32-NEXT:st.w $a1, $a0, 0
+; LA32-NEXT:ret
+;
+; LA64-LABEL: store_release_ptr:
+; LA64:   # %bb.0:
+; LA64-NEXT:amswap_db.d $zero, $a1, $a0
+; LA64-NEXT:ret
+  store atomic ptr %v, ptr %ptr release, align 8
+  ret void
+}
+
 define void @store_unordered_i8(ptr %ptr, i8 signext %v) {
 ; LA32-LABEL: store_unordered_i8:
 ; LA32:   # %bb.0:
@@ -399,6 +474,20 @@ define void @store_unordered_i64(ptr %ptr, i64 %v) {
   ret void
 }
 
+define void @store_unordered_ptr(ptr %ptr, ptr %v) {
+; LA32-LABEL: store_unordered_ptr:
+; LA32:   # %bb.0:
+; LA32-NEXT:st.w $a1, $a0, 0
+; LA32-NEXT:ret
+;
+; LA64-LABEL: store_unordered_ptr:
+; LA64:   # %bb.0:
+; LA64-NEXT:st.d $a1, $a0, 0
+; LA64-NEXT:ret
+  store atomic pt

[llvm-branch-commits] [clang] release/19.x: [clang-scan-deps] Don't inspect Args[0] as an option (#109050) (PR #109865)

2024-09-30 Thread via llvm-branch-commits
Martin =?utf-8?q?Storsjö?= ,
Martin =?utf-8?q?Storsjö?= ,
Martin =?utf-8?q?Storsjö?= 
Message-ID:
In-Reply-To: 


github-actions[bot] wrote:

@mstorsjo (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/109865
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Fix the assertion for atomic store with 'ptr' type (PR #109915)

2024-09-30 Thread via llvm-branch-commits

github-actions[bot] wrote:

@heiher (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/109915
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Fix the assertion for atomic store with 'ptr' type (PR #109915)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/109915
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 962edd3 - AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Petar Avramovic
Date: 2024-10-01T08:56:50+02:00
New Revision: 962edd3f71eebdcd781222cdd97a561979894003

URL: 
https://github.com/llvm/llvm-project/commit/962edd3f71eebdcd781222cdd97a561979894003
DIFF: 
https://github.com/llvm/llvm-project/commit/962edd3f71eebdcd781222cdd97a561979894003.diff

LOG: AMDGPU: Fix inst-selection of large scratch offsets with sgpr base 
(#110256)

Use i32 for offset instead of i16, this way it does not get interpreted
as negative 16 bit offset.

(cherry picked from commit 83fe85115da9dc25fa270d2ea8140113c8d49670)

Added: 


Modified: 
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Removed: 




diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index b7471bab128509..7b786ee2641721 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1911,7 +1911,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode 
*Parent, SDValue Addr,
 0);
   }
 
-  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16);
+  Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32);
 
   return true;
 }
@@ -1967,7 +1967,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, 
SDValue Addr,
   return false;
 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
   return false;
-Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
+Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
 return true;
   }
 }
@@ -2000,7 +2000,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, 
SDValue Addr,
   if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
 return false;
   SAddr = SelectSAddrFI(CurDAG, SAddr);
-  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16);
+  Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
   return true;
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll 
b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index f040b47428640a..284f1746145225 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -4956,7 +4956,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr 
addrspace(1) %out, ptr addrspa
 ;
 ; GFX12-LABEL: sgpr_base_large_offset:
 ; GFX12:   ; %bb.0: ; %entry
-; GFX12-NEXT:scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-NEXT:scratch_load_b32 v2, off, s0 offset:65512
 ; GFX12-NEXT:s_wait_loadcnt 0x0
 ; GFX12-NEXT:global_store_b32 v[0:1], v2, off
 ; GFX12-NEXT:s_nop 0
@@ -5015,7 +5015,7 @@ define amdgpu_gs void @sgpr_base_large_offset(ptr 
addrspace(1) %out, ptr addrspa
 ;
 ; GFX12-PAL-LABEL: sgpr_base_large_offset:
 ; GFX12-PAL:   ; %bb.0: ; %entry
-; GFX12-PAL-NEXT:scratch_load_b32 v2, off, s0 offset:-24
+; GFX12-PAL-NEXT:scratch_load_b32 v2, off, s0 offset:65512
 ; GFX12-PAL-NEXT:s_wait_loadcnt 0x0
 ; GFX12-PAL-NEXT:global_store_b32 v[0:1], v2, off
 ; GFX12-PAL-NEXT:s_nop 0
@@ -5068,7 +5068,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr 
addrspace(1) %out, ptr a
 ; GFX12:   ; %bb.0: ; %entry
 ; GFX12-NEXT:v_mov_b32_e32 v2, 0x100
 ; GFX12-NEXT:s_and_b32 s0, s0, -4
-; GFX12-NEXT:scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-NEXT:scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
 ; GFX12-NEXT:s_wait_loadcnt 0x0
 ; GFX12-NEXT:global_store_b32 v[0:1], v2, off
 ; GFX12-NEXT:s_nop 0
@@ -5133,7 +5133,7 @@ define amdgpu_gs void @sgpr_base_large_offset_split(ptr 
addrspace(1) %out, ptr a
 ; GFX12-PAL:   ; %bb.0: ; %entry
 ; GFX12-PAL-NEXT:v_mov_b32_e32 v2, 0x100
 ; GFX12-PAL-NEXT:s_and_b32 s0, s0, -4
-; GFX12-PAL-NEXT:scratch_load_b32 v2, v2, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-PAL-NEXT:scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS
 ; GFX12-PAL-NEXT:s_wait_loadcnt 0x0
 ; GFX12-PAL-NEXT:global_store_b32 v[0:1], v2, off
 ; GFX12-PAL-NEXT:s_nop 0
@@ -5189,7 +5189,7 @@ define amdgpu_gs void 
@sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
 ; GFX12:   ; %bb.0: ; %bb
 ; GFX12-NEXT:v_mov_b32_e32 v1, 15
 ; GFX12-NEXT:s_add_co_i32 s0, s0, s1
-; GFX12-NEXT:scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-NEXT:scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS
 ; GFX12-NEXT:s_wait_storecnt 0x0
 ; GFX12-NEXT:s_endpgm
 ;
@@ -5251,7 +5251,7 @@ define amdgpu_gs void 
@sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr a
 ; GFX12-PAL:   ; %bb.0: ; %bb
 ; GFX12-PAL-NEXT:v_mov_b32_e32 v1, 15
 ; GFX12-PAL-NEXT:s_add_co_i32 s0, s0, s1
-; GFX12-PAL-NEXT:scratch_store_b32 v0, v1, s0 offset:-24 scope:SCOPE_SYS
+; GFX12-PAL-NEXT:scratch_store_b32 v0, v1, s0 offset:65512

[llvm-branch-commits] [llvm] 03d1337 - AMDGPU: Add test for 16 bit unsigned scratch offsets (#110255)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Petar Avramovic
Date: 2024-10-01T08:56:50+02:00
New Revision: 03d133728ae14704b262c55bbb72ecd9d048add5

URL: 
https://github.com/llvm/llvm-project/commit/03d133728ae14704b262c55bbb72ecd9d048add5
DIFF: 
https://github.com/llvm/llvm-project/commit/03d133728ae14704b262c55bbb72ecd9d048add5.diff

LOG: AMDGPU: Add test for 16 bit unsigned scratch offsets (#110255)

Large scratch offset with one on highest bit selected as negative,
negative offset has same binary representation in 16 bits as large
unsigned offset.

(cherry picked from commit e9d12a6b451bd403d95105aa976a011dc821f126)

Added: 


Modified: 
llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
llvm/test/CodeGen/AMDGPU/flat-scratch.ll

Removed: 




diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index a5e4151bf36958..47ca6f416b02b0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -1513,4 +1513,243 @@ bb:
   ret void
 }
 
+define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr 
addrspace(5) inreg %sgpr_base) {
+; GFX9-LABEL: sgpr_base_large_offset:
+; GFX9:   ; %bb.0: ; %entry
+; GFX9-NEXT:s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:s_add_u32 s0, s2, 0xffe8
+; GFX9-NEXT:scratch_load_dword v2, off, s0
+; GFX9-NEXT:s_waitcnt vmcnt(0)
+; GFX9-NEXT:global_store_dword v[0:1], v2, off
+; GFX9-NEXT:s_endpgm
+;
+; GFX10-LABEL: sgpr_base_large_offset:
+; GFX10:   ; %bb.0: ; %entry
+; GFX10-NEXT:s_add_u32 s0, s0, s5
+; GFX10-NEXT:s_addc_u32 s1, s1, 0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:s_add_u32 s0, s2, 0xffe8
+; GFX10-NEXT:scratch_load_dword v2, off, s0
+; GFX10-NEXT:s_waitcnt vmcnt(0)
+; GFX10-NEXT:global_store_dword v[0:1], v2, off
+; GFX10-NEXT:s_endpgm
+;
+; GFX940-LABEL: sgpr_base_large_offset:
+; GFX940:   ; %bb.0: ; %entry
+; GFX940-NEXT:s_add_u32 s0, s0, 0xffe8
+; GFX940-NEXT:scratch_load_dword v2, off, s0
+; GFX940-NEXT:s_waitcnt vmcnt(0)
+; GFX940-NEXT:global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:s_endpgm
+;
+; GFX11-LABEL: sgpr_base_large_offset:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_add_u32 s0, s0, 0xffe8
+; GFX11-NEXT:scratch_load_b32 v2, off, s0
+; GFX11-NEXT:s_waitcnt vmcnt(0)
+; GFX11-NEXT:global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:s_nop 0
+; GFX11-NEXT:s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:s_endpgm
+;
+; GFX12-LABEL: sgpr_base_large_offset:
+; GFX12:   ; %bb.0: ; %entry
+; GFX12-NEXT:scratch_load_b32 v2, off, s0 offset:65512
+; GFX12-NEXT:s_wait_loadcnt 0x0
+; GFX12-NEXT:global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:s_nop 0
+; GFX12-NEXT:s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT:s_endpgm
+entry:
+  %large_offset = getelementptr i8, ptr addrspace(5) %sgpr_base, i32 65512
+  %load = load i32, ptr addrspace(5) %large_offset, align 4
+  store i32 %load, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr 
addrspace(5) inreg %sgpr_base) {
+; GFX9-LABEL: sgpr_base_large_offset_split:
+; GFX9:   ; %bb.0: ; %entry
+; GFX9-NEXT:s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:s_and_b32 s0, s2, -4
+; GFX9-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX9-NEXT:scratch_load_dword v2, off, s0 glc
+; GFX9-NEXT:s_waitcnt vmcnt(0)
+; GFX9-NEXT:global_store_dword v[0:1], v2, off
+; GFX9-NEXT:s_endpgm
+;
+; GFX10-LABEL: sgpr_base_large_offset_split:
+; GFX10:   ; %bb.0: ; %entry
+; GFX10-NEXT:s_add_u32 s0, s0, s5
+; GFX10-NEXT:s_addc_u32 s1, s1, 0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:s_and_b32 s0, s2, -4
+; GFX10-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX10-NEXT:scratch_load_dword v2, off, s0 glc dlc
+; GFX10-NEXT:s_waitcnt vmcnt(0)
+; GFX10-NEXT:global_store_dword v[0:1], v2, off
+; GFX10-NEXT:s_endpgm
+;
+; GFX940-LABEL: sgpr_base_large_offset_split:
+; GFX940:   ; %bb.0: ; %entry
+; GFX940-NEXT:s_and_b32 s0, s0, -4
+; GFX940-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX940-NEXT:scratch_load_dword v2, off, s0 sc0 sc1
+; GFX940-NEXT:s_waitcnt vmcnt(0)
+; GFX940-NEXT:global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:s_endpgm
+;
+; GFX11-LABEL: sgpr_base_large_offset_split:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_and_b32 s0, s0, -4
+; GFX11-NEXT:s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX11-NEXT:s

[llvm-branch-commits] [llvm] release/19.x: AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256) (PR #110470)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/110470

>From 03d133728ae14704b262c55bbb72ecd9d048add5 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Mon, 30 Sep 2024 10:39:17 +0200
Subject: [PATCH 1/2] AMDGPU: Add test for 16 bit unsigned scratch offsets
 (#110255)

Large scratch offset with one on highest bit selected as negative,
negative offset has same binary representation in 16 bits as large
unsigned offset.

(cherry picked from commit e9d12a6b451bd403d95105aa976a011dc821f126)
---
 .../CodeGen/AMDGPU/GlobalISel/flat-scratch.ll | 239 ++
 llvm/test/CodeGen/AMDGPU/flat-scratch.ll  | 444 ++
 2 files changed, 683 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index a5e4151bf36958..47ca6f416b02b0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -1513,4 +1513,243 @@ bb:
   ret void
 }
 
+define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr 
addrspace(5) inreg %sgpr_base) {
+; GFX9-LABEL: sgpr_base_large_offset:
+; GFX9:   ; %bb.0: ; %entry
+; GFX9-NEXT:s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:s_add_u32 s0, s2, 0xffe8
+; GFX9-NEXT:scratch_load_dword v2, off, s0
+; GFX9-NEXT:s_waitcnt vmcnt(0)
+; GFX9-NEXT:global_store_dword v[0:1], v2, off
+; GFX9-NEXT:s_endpgm
+;
+; GFX10-LABEL: sgpr_base_large_offset:
+; GFX10:   ; %bb.0: ; %entry
+; GFX10-NEXT:s_add_u32 s0, s0, s5
+; GFX10-NEXT:s_addc_u32 s1, s1, 0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:s_add_u32 s0, s2, 0xffe8
+; GFX10-NEXT:scratch_load_dword v2, off, s0
+; GFX10-NEXT:s_waitcnt vmcnt(0)
+; GFX10-NEXT:global_store_dword v[0:1], v2, off
+; GFX10-NEXT:s_endpgm
+;
+; GFX940-LABEL: sgpr_base_large_offset:
+; GFX940:   ; %bb.0: ; %entry
+; GFX940-NEXT:s_add_u32 s0, s0, 0xffe8
+; GFX940-NEXT:scratch_load_dword v2, off, s0
+; GFX940-NEXT:s_waitcnt vmcnt(0)
+; GFX940-NEXT:global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:s_endpgm
+;
+; GFX11-LABEL: sgpr_base_large_offset:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_add_u32 s0, s0, 0xffe8
+; GFX11-NEXT:scratch_load_b32 v2, off, s0
+; GFX11-NEXT:s_waitcnt vmcnt(0)
+; GFX11-NEXT:global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:s_nop 0
+; GFX11-NEXT:s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:s_endpgm
+;
+; GFX12-LABEL: sgpr_base_large_offset:
+; GFX12:   ; %bb.0: ; %entry
+; GFX12-NEXT:scratch_load_b32 v2, off, s0 offset:65512
+; GFX12-NEXT:s_wait_loadcnt 0x0
+; GFX12-NEXT:global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:s_nop 0
+; GFX12-NEXT:s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT:s_endpgm
+entry:
+  %large_offset = getelementptr i8, ptr addrspace(5) %sgpr_base, i32 65512
+  %load = load i32, ptr addrspace(5) %large_offset, align 4
+  store i32 %load, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr 
addrspace(5) inreg %sgpr_base) {
+; GFX9-LABEL: sgpr_base_large_offset_split:
+; GFX9:   ; %bb.0: ; %entry
+; GFX9-NEXT:s_add_u32 flat_scratch_lo, s0, s5
+; GFX9-NEXT:s_addc_u32 flat_scratch_hi, s1, 0
+; GFX9-NEXT:s_and_b32 s0, s2, -4
+; GFX9-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX9-NEXT:scratch_load_dword v2, off, s0 glc
+; GFX9-NEXT:s_waitcnt vmcnt(0)
+; GFX9-NEXT:global_store_dword v[0:1], v2, off
+; GFX9-NEXT:s_endpgm
+;
+; GFX10-LABEL: sgpr_base_large_offset_split:
+; GFX10:   ; %bb.0: ; %entry
+; GFX10-NEXT:s_add_u32 s0, s0, s5
+; GFX10-NEXT:s_addc_u32 s1, s1, 0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0
+; GFX10-NEXT:s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1
+; GFX10-NEXT:s_and_b32 s0, s2, -4
+; GFX10-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX10-NEXT:scratch_load_dword v2, off, s0 glc dlc
+; GFX10-NEXT:s_waitcnt vmcnt(0)
+; GFX10-NEXT:global_store_dword v[0:1], v2, off
+; GFX10-NEXT:s_endpgm
+;
+; GFX940-LABEL: sgpr_base_large_offset_split:
+; GFX940:   ; %bb.0: ; %entry
+; GFX940-NEXT:s_and_b32 s0, s0, -4
+; GFX940-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX940-NEXT:scratch_load_dword v2, off, s0 sc0 sc1
+; GFX940-NEXT:s_waitcnt vmcnt(0)
+; GFX940-NEXT:global_store_dword v[0:1], v2, off sc0 sc1
+; GFX940-NEXT:s_endpgm
+;
+; GFX11-LABEL: sgpr_base_large_offset_split:
+; GFX11:   ; %bb.0: ; %entry
+; GFX11-NEXT:s_and_b32 s0, s0, -4
+; GFX11-NEXT:s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:s_add_u32 s0, s0, 0x100ffe8
+; GFX11-NEXT:scratch_load_b32 v2, off, s0 glc dlc
+; GFX11-NEXT:s_waitcnt vmcnt(0)
+; GFX11-NEXT:global_store_b32 v[0:1], v2, 

[llvm-branch-commits] [libcxx] [release/19.x][libc++] Fix AppleClang version number when checking for __builtin_verbose_trap support (PR #110263)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/110263

>From 53010fcf66b5a84153bce6b7e866edb596e59cf4 Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Fri, 27 Sep 2024 08:53:02 -0400
Subject: [PATCH] [libc++] Fix AppleClang version number when checking for
 __builtin_verbose_trap support (#110161)

We should have been checking against 1700, not 17000, which was a typo.

(cherry picked from commit 1eba87904b0cbaaee82cfdb835528b85d99320ef)
---
 libcxx/vendor/llvm/default_assertion_handler.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libcxx/vendor/llvm/default_assertion_handler.in 
b/libcxx/vendor/llvm/default_assertion_handler.in
index 3b6d6b2cca53c2..e12daff37f 100644
--- a/libcxx/vendor/llvm/default_assertion_handler.in
+++ b/libcxx/vendor/llvm/default_assertion_handler.in
@@ -26,7 +26,8 @@
 #  if __has_builtin(__builtin_verbose_trap)
 // AppleClang shipped a slightly different version of __builtin_verbose_trap 
from the upstream
 // version before upstream Clang actually got the builtin.
-#if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 17000
+// TODO: Remove once AppleClang supports the two-arguments version of the 
builtin.
+#if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1700
 #  define _LIBCPP_ASSERTION_HANDLER(message) 
__builtin_verbose_trap(message)
 #else
 #  define _LIBCPP_ASSERTION_HANDLER(message) 
__builtin_verbose_trap("libc++", message)

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [release/19.x][libc++] Fix AppleClang version number when checking for __builtin_verbose_trap support (PR #110263)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/110263
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] 53010fc - [libc++] Fix AppleClang version number when checking for __builtin_verbose_trap support (#110161)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Louis Dionne
Date: 2024-10-01T08:56:18+02:00
New Revision: 53010fcf66b5a84153bce6b7e866edb596e59cf4

URL: 
https://github.com/llvm/llvm-project/commit/53010fcf66b5a84153bce6b7e866edb596e59cf4
DIFF: 
https://github.com/llvm/llvm-project/commit/53010fcf66b5a84153bce6b7e866edb596e59cf4.diff

LOG: [libc++] Fix AppleClang version number when checking for 
__builtin_verbose_trap support (#110161)

We should have been checking against 1700, not 17000, which was a typo.

(cherry picked from commit 1eba87904b0cbaaee82cfdb835528b85d99320ef)

Added: 


Modified: 
libcxx/vendor/llvm/default_assertion_handler.in

Removed: 




diff  --git a/libcxx/vendor/llvm/default_assertion_handler.in 
b/libcxx/vendor/llvm/default_assertion_handler.in
index 3b6d6b2cca53c2..e12daff37f 100644
--- a/libcxx/vendor/llvm/default_assertion_handler.in
+++ b/libcxx/vendor/llvm/default_assertion_handler.in
@@ -26,7 +26,8 @@
 #  if __has_builtin(__builtin_verbose_trap)
 // AppleClang shipped a slightly 
diff erent version of __builtin_verbose_trap from the upstream
 // version before upstream Clang actually got the builtin.
-#if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 17000
+// TODO: Remove once AppleClang supports the two-arguments version of the 
builtin.
+#if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1700
 #  define _LIBCPP_ASSERTION_HANDLER(message) 
__builtin_verbose_trap(message)
 #else
 #  define _LIBCPP_ASSERTION_HANDLER(message) 
__builtin_verbose_trap("libc++", message)



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: AMDGPU: Fix inst-selection of large scratch offsets with sgpr base (#110256) (PR #110470)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/110470
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/109093
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) (PR #109093)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Sounds to me like we don't have to accept this patch into the 19.1 release. 
Please re-open and argue if you don't agree with that assesment.

https://github.com/llvm/llvm-project/pull/109093
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [release/19.x][libc++] Fix AppleClang version number when checking for __builtin_verbose_trap support (PR #110263)

2024-09-30 Thread via llvm-branch-commits

github-actions[bot] wrote:

@ldionne (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/110263
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Backport "[Clang][CodeGen] Fix type for atomic float incdec operators (#107075)" (PR #107184)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/107184

>From 149bfdd61c961edbf49c2ea7fadf9d3c1a79a55e Mon Sep 17 00:00:00 2001
From: Yingwei Zheng 
Date: Wed, 4 Sep 2024 12:19:46 +0800
Subject: [PATCH] [Clang][CodeGen] Fix type for atomic float incdec operators
 (#107075)

`llvm::ConstantFP::get(llvm::LLVMContext&, APFloat(float))` always
returns a f32 constant.
Fix https://github.com/llvm/llvm-project/issues/107054.
---
 clang/lib/CodeGen/CGExprScalar.cpp|  26 +-
 clang/test/CodeGen/X86/x86-atomic-double.c|  88 +++---
 .../test/CodeGen/X86/x86-atomic-long_double.c | 293 ++
 3 files changed, 300 insertions(+), 107 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index a17d68424bbce5..6e212e74676e8d 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2833,18 +2833,22 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const 
UnaryOperator *E, LValue LV,
   
llvm::AtomicOrdering::SequentiallyConsistent);
   return isPre ? Builder.CreateBinOp(op, old, amt) : old;
 }
-// Special case for atomic increment/decrement on floats
+// Special case for atomic increment/decrement on floats.
+// Bail out non-power-of-2-sized floating point types (e.g., x86_fp80).
 if (type->isFloatingType()) {
-  llvm::AtomicRMWInst::BinOp aop =
-  isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub;
-  llvm::Instruction::BinaryOps op =
-  isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
-  llvm::Value *amt = llvm::ConstantFP::get(
-  VMContext, llvm::APFloat(static_cast(1.0)));
-  llvm::Value *old =
-  Builder.CreateAtomicRMW(aop, LV.getAddress(), amt,
-  
llvm::AtomicOrdering::SequentiallyConsistent);
-  return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+  llvm::Type *Ty = ConvertType(type);
+  if (llvm::has_single_bit(Ty->getScalarSizeInBits())) {
+llvm::AtomicRMWInst::BinOp aop =
+isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub;
+llvm::Instruction::BinaryOps op =
+isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
+llvm::Value *amt = llvm::ConstantFP::get(Ty, 1.0);
+llvm::AtomicRMWInst *old = Builder.CreateAtomicRMW(
+aop, LV.getAddress(), amt,
+llvm::AtomicOrdering::SequentiallyConsistent);
+
+return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+  }
 }
 value = EmitLoadOfLValue(LV, E->getExprLoc());
 input = value;
diff --git a/clang/test/CodeGen/X86/x86-atomic-double.c 
b/clang/test/CodeGen/X86/x86-atomic-double.c
index 2354c89cc2b170..09c8f70c3db854 100644
--- a/clang/test/CodeGen/X86/x86-atomic-double.c
+++ b/clang/test/CodeGen/X86/x86-atomic-double.c
@@ -6,20 +6,14 @@
 // X64-LABEL: define dso_local double @test_double_post_inc(
 // X64-SAME: ) #[[ATTR0:[0-9]+]] {
 // X64-NEXT:  entry:
-// X64-NEXT:[[RETVAL:%.*]] = alloca double, align 8
-// X64-NEXT:[[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, 
float 1.00e+00 seq_cst, align 8
-// X64-NEXT:store float [[TMP0]], ptr [[RETVAL]], align 8
-// X64-NEXT:[[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8
-// X64-NEXT:ret double [[TMP1]]
+// X64-NEXT:[[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, 
double 1.00e+00 seq_cst, align 8
+// X64-NEXT:ret double [[TMP0]]
 //
 // X86-LABEL: define dso_local double @test_double_post_inc(
 // X86-SAME: ) #[[ATTR0:[0-9]+]] {
 // X86-NEXT:  entry:
-// X86-NEXT:[[RETVAL:%.*]] = alloca double, align 4
-// X86-NEXT:[[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, 
float 1.00e+00 seq_cst, align 8
-// X86-NEXT:store float [[TMP0]], ptr [[RETVAL]], align 4
-// X86-NEXT:[[TMP1:%.*]] = load double, ptr [[RETVAL]], align 4
-// X86-NEXT:ret double [[TMP1]]
+// X86-NEXT:[[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, 
double 1.00e+00 seq_cst, align 8
+// X86-NEXT:ret double [[TMP0]]
 //
 double test_double_post_inc()
 {
@@ -30,20 +24,14 @@ double test_double_post_inc()
 // X64-LABEL: define dso_local double @test_double_post_dc(
 // X64-SAME: ) #[[ATTR0]] {
 // X64-NEXT:  entry:
-// X64-NEXT:[[RETVAL:%.*]] = alloca double, align 8
-// X64-NEXT:[[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, 
float 1.00e+00 seq_cst, align 8
-// X64-NEXT:store float [[TMP0]], ptr [[RETVAL]], align 8
-// X64-NEXT:[[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8
-// X64-NEXT:ret double [[TMP1]]
+// X64-NEXT:[[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, 
double 1.00e+00 seq_cst, align 8
+// X64-NEXT:ret double [[TMP0]]
 //
 // X86-LABEL: define dso_local double @test_double_post_dc(
 // X86-SAME: ) #[[ATTR0]] {
 // X86-NEXT:  entry:
-// X86-NEXT:[[RETVAL:%.*]]

[llvm-branch-commits] [libcxx] [release/19.x] Cherry-picks to fix the zdump spurious failures in CI (PR #110259)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/110259

>From be879942edbb27b4a43eb6b5f4162153972fc41a Mon Sep 17 00:00:00 2001
From: David Spickett 
Date: Thu, 15 Aug 2024 08:14:13 +
Subject: [PATCH 1/2] [lldb][test] Mark sys_info zdump test unsupported on 32
 bit Arm Linux

Until https://github.com/llvm/llvm-project/pull/103056 lands
or another more appropriate check can be found.

This test fails on Ubuntu Focal where zdump is built with 32 bit time_t
but passes on Ubuntu Jammy where zdump is built with 64 bit time_t.

Marking it unsupported means Linaro can upgrade its bots to Ubuntu
Jammy without getting an unexpected pass.

(cherry picked from commit 6f6422f4a2b8647a59936c131e50a79906d89510)
---
 .../time.zone.members/sys_info.zdump.pass.cpp   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
 
b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
index 207f8e4df45413..2b97d9a5bc745b 100644
--- 
a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
+++ 
b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
@@ -14,7 +14,7 @@
 // XFAIL: availability-tzdb-missing
 
 // TODO TZDB Investigate
-// XFAIL: target={{armv(7|8)l-linux-gnueabihf}}
+// UNSUPPORTED: target={{armv(7|8)l-linux-gnueabihf}}
 
 #include 
 #include 

>From b43662ee7cdcf96d9eeda3c5d6707c6fb08ed3dc Mon Sep 17 00:00:00 2001
From: David Spickett 
Date: Fri, 13 Sep 2024 09:14:53 +0100
Subject: [PATCH 2/2] [libcxx][test] Use smaller time range for 32 bit time_t
 (#104762)

This fixes the test on Arm 32 bit Ubuntu Jammy where time_t is 32 bit.

(cherry picked from commit cdd608b8f0ce090b3568238387df368751bdbb5d)
---
 .../time.zone.members/sys_info.zdump.pass.cpp| 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git 
a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
 
b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
index 2b97d9a5bc745b..b474fe50083b1d 100644
--- 
a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
+++ 
b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
@@ -13,9 +13,6 @@
 // XFAIL: libcpp-has-no-experimental-tzdb
 // XFAIL: availability-tzdb-missing
 
-// TODO TZDB Investigate
-// UNSUPPORTED: target={{armv(7|8)l-linux-gnueabihf}}
-
 #include 
 #include 
 #include 
@@ -28,7 +25,7 @@
 // The year range to validate. The dates used in practice are expected to be
 // inside the tested range.
 constexpr std::chrono::year first{1800};
-constexpr std::chrono::year last{2100};
+constexpr std::chrono::year last{sizeof(time_t) == 8 ? 2100 : 2037};
 
 // A custom sys_info class that also stores the name of the time zone.
 // Its formatter matches the output of zdump.

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [release/19.x][libc++] Disable the clang-tidy checks to get CI back (#109989) (PR #110162)

2024-09-30 Thread via llvm-branch-commits

github-actions[bot] wrote:

@ldionne (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/110162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [release/19.x][libc++] Disable the clang-tidy checks to get CI back (#109989) (PR #110162)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/110162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] Backport "[Clang][CodeGen] Fix type for atomic float incdec operators (#107075)" (PR #107184)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/107184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang-scan-deps] Don't inspect Args[0] as an option (#109050) (PR #109865)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits
Martin =?utf-8?q?Storsjö?= ,
Martin =?utf-8?q?Storsjö?= ,
Martin =?utf-8?q?Storsjö?= 
Message-ID:
In-Reply-To: 


https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109865

>From 7d1f2065d68795b6fc6de4953f9f0ac719cf1c65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= 
Date: Thu, 12 Sep 2024 22:20:14 +0300
Subject: [PATCH 1/4] [clang-scan-deps] Infer the target from the executable
 name (#108189)

This allows clang-scan-deps to work correctly when using cross compilers
with names like -clang.

(cherry picked from commit 87e1104cf0e2de0d04bee2944893fa7897277b2f)
---
 clang/test/ClangScanDeps/implicit-target.c| 31 +++
 clang/tools/clang-scan-deps/ClangScanDeps.cpp |  5 +++
 2 files changed, 36 insertions(+)
 create mode 100644 clang/test/ClangScanDeps/implicit-target.c

diff --git a/clang/test/ClangScanDeps/implicit-target.c 
b/clang/test/ClangScanDeps/implicit-target.c
new file mode 100644
index 00..cf757f937331a6
--- /dev/null
+++ b/clang/test/ClangScanDeps/implicit-target.c
@@ -0,0 +1,31 @@
+// Check that we can detect an implicit target when clang is invoked as
+// clang. Using an implicit triple requires that the target actually
+// is available, too.
+// REQUIRES: x86-registered-target
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
+
+// Check that we can deduce this both when using a compilation database, and 
when using
+// a literal command line.
+
+// RUN: clang-scan-deps -format experimental-full -compilation-database 
%t/cdb.json | FileCheck %s
+
+// RUN: clang-scan-deps -format experimental-full -- x86_64-w64-mingw32-clang 
%t/source.c -o %t/source.o | FileCheck %s
+
+// CHECK: "-triple",
+// CHECK-NEXT: "x86_64-w64-windows-gnu",
+
+
+//--- cdb.json.in
+[
+  {
+"directory": "DIR"
+"command": "x86_64-w64-mingw32-clang -c DIR/source.c -o DIR/source.o"
+"file": "DIR/source.c"
+  },
+]
+
+//--- source.c
+void func(void) {}
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp 
b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index a8f6150dd3493d..cd6dd2620152a6 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -15,6 +15,7 @@
 #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Tooling/JSONCompilationDatabase.h"
+#include "clang/Tooling/Tooling.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/CommandLine.h"
@@ -24,6 +25,7 @@
 #include "llvm/Support/LLVMDriver.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/Timer.h"
@@ -795,6 +797,7 @@ getCompilationDatabase(int argc, char **argv, std::string 
&ErrorMessage) {
 }
 
 int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
+  llvm::InitializeAllTargetInfos();
   std::string ErrorMessage;
   std::unique_ptr Compilations =
   getCompilationDatabase(argc, argv, ErrorMessage);
@@ -810,6 +813,8 @@ int clang_scan_deps_main(int argc, char **argv, const 
llvm::ToolContext &) {
   Compilations = expandResponseFiles(std::move(Compilations),
  llvm::vfs::getRealFileSystem());
 
+  Compilations = inferTargetAndDriverMode(std::move(Compilations));
+
   // The command options are rewritten to run Clang in preprocessor only mode.
   auto AdjustingCompilations =
   std::make_unique(

>From a0fc8a2b2b85a70c8c523ff2d1fe4ef2e86cda7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= 
Date: Thu, 12 Sep 2024 23:11:27 +0300
Subject: [PATCH 2/4] [clang-scan-deps] Fix builds with BUILD_SHARED_LIBS=ON

This fixes building in this configuration after
87e1104cf0e2de0d04bee2944893fa7897277b2f.

(cherry picked from commit aa3465793a250faa5426ac626989375465256658)
---
 clang/tools/clang-scan-deps/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/tools/clang-scan-deps/CMakeLists.txt 
b/clang/tools/clang-scan-deps/CMakeLists.txt
index f0be6a546ff882..10bc0ff23c5482 100644
--- a/clang/tools/clang-scan-deps/CMakeLists.txt
+++ b/clang/tools/clang-scan-deps/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
   Core
   Option
   Support

>From 2b6c23303f7c3f6397003cdac4be6e9e6b78d957 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= 
Date: Fri, 13 Sep 2024 23:18:10 +0300
Subject: [PATCH 3/4] [clang-scan-deps] Infer the tool locations from PATH
 (#108539)

This allows the clang driver to know which tool is meant to be executed,
which allows the clang driver to load the right clang config files, and
allows clang to find colocated sysroots.

This makes sure that doing `clang-scan-deps --  ...` looks up
things in the same way as if one 

[llvm-branch-commits] [clang] 7d1f206 - [clang-scan-deps] Infer the target from the executable name (#108189)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Martin Storsjö
Date: 2024-10-01T08:53:03+02:00
New Revision: 7d1f2065d68795b6fc6de4953f9f0ac719cf1c65

URL: 
https://github.com/llvm/llvm-project/commit/7d1f2065d68795b6fc6de4953f9f0ac719cf1c65
DIFF: 
https://github.com/llvm/llvm-project/commit/7d1f2065d68795b6fc6de4953f9f0ac719cf1c65.diff

LOG: [clang-scan-deps] Infer the target from the executable name (#108189)

This allows clang-scan-deps to work correctly when using cross compilers
with names like -clang.

(cherry picked from commit 87e1104cf0e2de0d04bee2944893fa7897277b2f)

Added: 
clang/test/ClangScanDeps/implicit-target.c

Modified: 
clang/tools/clang-scan-deps/ClangScanDeps.cpp

Removed: 




diff  --git a/clang/test/ClangScanDeps/implicit-target.c 
b/clang/test/ClangScanDeps/implicit-target.c
new file mode 100644
index 00..cf757f937331a6
--- /dev/null
+++ b/clang/test/ClangScanDeps/implicit-target.c
@@ -0,0 +1,31 @@
+// Check that we can detect an implicit target when clang is invoked as
+// clang. Using an implicit triple requires that the target actually
+// is available, too.
+// REQUIRES: x86-registered-target
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json
+
+// Check that we can deduce this both when using a compilation database, and 
when using
+// a literal command line.
+
+// RUN: clang-scan-deps -format experimental-full -compilation-database 
%t/cdb.json | FileCheck %s
+
+// RUN: clang-scan-deps -format experimental-full -- x86_64-w64-mingw32-clang 
%t/source.c -o %t/source.o | FileCheck %s
+
+// CHECK: "-triple",
+// CHECK-NEXT: "x86_64-w64-windows-gnu",
+
+
+//--- cdb.json.in
+[
+  {
+"directory": "DIR"
+"command": "x86_64-w64-mingw32-clang -c DIR/source.c -o DIR/source.o"
+"file": "DIR/source.c"
+  },
+]
+
+//--- source.c
+void func(void) {}

diff  --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp 
b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index a8f6150dd3493d..cd6dd2620152a6 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -15,6 +15,7 @@
 #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Tooling/JSONCompilationDatabase.h"
+#include "clang/Tooling/Tooling.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/CommandLine.h"
@@ -24,6 +25,7 @@
 #include "llvm/Support/LLVMDriver.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/Timer.h"
@@ -795,6 +797,7 @@ getCompilationDatabase(int argc, char **argv, std::string 
&ErrorMessage) {
 }
 
 int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
+  llvm::InitializeAllTargetInfos();
   std::string ErrorMessage;
   std::unique_ptr Compilations =
   getCompilationDatabase(argc, argv, ErrorMessage);
@@ -810,6 +813,8 @@ int clang_scan_deps_main(int argc, char **argv, const 
llvm::ToolContext &) {
   Compilations = expandResponseFiles(std::move(Compilations),
  llvm::vfs::getRealFileSystem());
 
+  Compilations = inferTargetAndDriverMode(std::move(Compilations));
+
   // The command options are rewritten to run Clang in preprocessor only mode.
   auto AdjustingCompilations =
   std::make_unique(



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] a0fc8a2 - [clang-scan-deps] Fix builds with BUILD_SHARED_LIBS=ON

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Martin Storsjö
Date: 2024-10-01T08:53:03+02:00
New Revision: a0fc8a2b2b85a70c8c523ff2d1fe4ef2e86cda7f

URL: 
https://github.com/llvm/llvm-project/commit/a0fc8a2b2b85a70c8c523ff2d1fe4ef2e86cda7f
DIFF: 
https://github.com/llvm/llvm-project/commit/a0fc8a2b2b85a70c8c523ff2d1fe4ef2e86cda7f.diff

LOG: [clang-scan-deps] Fix builds with BUILD_SHARED_LIBS=ON

This fixes building in this configuration after
87e1104cf0e2de0d04bee2944893fa7897277b2f.

(cherry picked from commit aa3465793a250faa5426ac626989375465256658)

Added: 


Modified: 
clang/tools/clang-scan-deps/CMakeLists.txt

Removed: 




diff  --git a/clang/tools/clang-scan-deps/CMakeLists.txt 
b/clang/tools/clang-scan-deps/CMakeLists.txt
index f0be6a546ff882..10bc0ff23c5482 100644
--- a/clang/tools/clang-scan-deps/CMakeLists.txt
+++ b/clang/tools/clang-scan-deps/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
   Core
   Option
   Support



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 2b6c233 - [clang-scan-deps] Infer the tool locations from PATH (#108539)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Martin Storsjö
Date: 2024-10-01T08:53:03+02:00
New Revision: 2b6c23303f7c3f6397003cdac4be6e9e6b78d957

URL: 
https://github.com/llvm/llvm-project/commit/2b6c23303f7c3f6397003cdac4be6e9e6b78d957
DIFF: 
https://github.com/llvm/llvm-project/commit/2b6c23303f7c3f6397003cdac4be6e9e6b78d957.diff

LOG: [clang-scan-deps] Infer the tool locations from PATH (#108539)

This allows the clang driver to know which tool is meant to be executed,
which allows the clang driver to load the right clang config files, and
allows clang to find colocated sysroots.

This makes sure that doing `clang-scan-deps --  ...` looks up
things in the same way as if one just would execute ` ...`, when
`` isn't an absolute or relative path.

(cherry picked from commit a26ec542371652e1d774696e90016fd5b0b1c191)

Added: 
clang/lib/Tooling/LocateToolCompilationDatabase.cpp
clang/test/ClangScanDeps/resolve-executable-path.c

Modified: 
clang/include/clang/Tooling/CompilationDatabase.h
clang/lib/Tooling/CMakeLists.txt
clang/test/ClangScanDeps/modules-extern-submodule.c
clang/test/ClangScanDeps/modules-full-output-tu-order.c
clang/test/ClangScanDeps/modules-has-include-umbrella-header.c
clang/test/ClangScanDeps/modules-header-sharing.m
clang/test/ClangScanDeps/modules-implementation-module-map.c
clang/test/ClangScanDeps/modules-implementation-private.m
clang/test/ClangScanDeps/modules-priv-fw-from-pub.m
clang/tools/clang-scan-deps/ClangScanDeps.cpp

Removed: 




diff  --git a/clang/include/clang/Tooling/CompilationDatabase.h 
b/clang/include/clang/Tooling/CompilationDatabase.h
index fee584acb48623..36fe0812ebe974 100644
--- a/clang/include/clang/Tooling/CompilationDatabase.h
+++ b/clang/include/clang/Tooling/CompilationDatabase.h
@@ -234,6 +234,12 @@ std::unique_ptr
 std::unique_ptr
 inferTargetAndDriverMode(std::unique_ptr Base);
 
+/// Returns a wrapped CompilationDatabase that will transform argv[0] to an
+/// absolute path, if it currently is a plain tool name, looking it up in
+/// PATH.
+std::unique_ptr
+inferToolLocation(std::unique_ptr Base);
+
 /// Returns a wrapped CompilationDatabase that will expand all rsp(response)
 /// files on commandline returned by underlying database.
 std::unique_ptr

diff  --git a/clang/lib/Tooling/CMakeLists.txt 
b/clang/lib/Tooling/CMakeLists.txt
index 93a9e707a134cf..fc1f1f9f9d367e 100644
--- a/clang/lib/Tooling/CMakeLists.txt
+++ b/clang/lib/Tooling/CMakeLists.txt
@@ -25,6 +25,7 @@ add_clang_library(clangTooling
   GuessTargetAndModeCompilationDatabase.cpp
   InterpolatingCompilationDatabase.cpp
   JSONCompilationDatabase.cpp
+  LocateToolCompilationDatabase.cpp
   Refactoring.cpp
   RefactoringCallbacks.cpp
   StandaloneExecution.cpp

diff  --git a/clang/lib/Tooling/LocateToolCompilationDatabase.cpp 
b/clang/lib/Tooling/LocateToolCompilationDatabase.cpp
new file mode 100644
index 00..033f69f3760c6d
--- /dev/null
+++ b/clang/lib/Tooling/LocateToolCompilationDatabase.cpp
@@ -0,0 +1,71 @@
+//===- GuessTargetAndModeCompilationDatabase.cpp 
--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#include "clang/Tooling/CompilationDatabase.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include 
+
+namespace clang {
+namespace tooling {
+
+namespace {
+class LocationAdderDatabase : public CompilationDatabase {
+public:
+  LocationAdderDatabase(std::unique_ptr Base)
+  : Base(std::move(Base)) {
+assert(this->Base != nullptr);
+  }
+
+  std::vector getAllFiles() const override {
+return Base->getAllFiles();
+  }
+
+  std::vector getAllCompileCommands() const override {
+return addLocation(Base->getAllCompileCommands());
+  }
+
+  std::vector
+  getCompileCommands(StringRef FilePath) const override {
+return addLocation(Base->getCompileCommands(FilePath));
+  }
+
+private:
+  std::vector
+  addLocation(std::vector Cmds) const {
+for (auto &Cmd : Cmds) {
+  if (Cmd.CommandLine.empty())
+continue;
+  std::string &Driver = Cmd.CommandLine.front();
+  // If the driver name already is absolute, we don't need to do anything.
+  if (llvm::sys::path::is_absolute(Driver))
+continue;
+  // If the name is a relative path, like bin/clang, we assume it's
+  // possible to resolve it and don't do anything about it either.
+  if (llvm::any_of(Driver,
+   [](char C) { return llvm::sys::path::is_separator(C); 
}))
+continue;
+  auto Absolute = llvm::sys::findProgramByName(Driver);
+  // If we found it in path, update the entry in Cmd.CommandLine
+ 

[llvm-branch-commits] [llvm] release/19.x: [LoopPeel] Fix LCSSA phi node invalidation (PR #109624)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109624

>From aaa7027716ad347cda75865e99a2ff654bed6bf1 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Fri, 20 Sep 2024 16:57:46 +0200
Subject: [PATCH] [LoopPeel] Fix LCSSA phi node invalidation

In the test case, the BECount of the second loop uses %load,
but we only have an LCSSA phi node for %add, so that is what
gets invalidated. Use the forgetLcssaPhiWithNewPredecessor()
API instead, which will invalidate the roots of the expression
instead.

Fixes https://github.com/llvm/llvm-project/issues/109333.

(cherry picked from commit 5bcc82d43388bb0daa122d5fe7ecda5eca27fc16)
---
 llvm/lib/Transforms/Utils/LoopPeel.cpp  |   2 +-
 llvm/test/Transforms/LoopUnroll/pr109333.ll | 104 
 2 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/pr109333.ll

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp 
b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 5d7c0d947facc4..760f1619e030c3 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -859,7 +859,7 @@ static void cloneLoopBlocks(
   if (LatchInst && L->contains(LatchInst))
 LatchVal = VMap[LatchVal];
   PHI.addIncoming(LatchVal, cast(VMap[Edge.first]));
-  SE.forgetValue(&PHI);
+  SE.forgetLcssaPhiWithNewPredecessor(L, &PHI);
 }
 
   // LastValueMap is updated with the values for the current loop
diff --git a/llvm/test/Transforms/LoopUnroll/pr109333.ll 
b/llvm/test/Transforms/LoopUnroll/pr109333.ll
new file mode 100644
index 00..f7ac911a78207a
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/pr109333.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 5
+; RUN: opt -S -passes="print,loop-unroll" -unroll-runtime < 
%s 2>/dev/null | FileCheck %s
+
+; Make sure we use %add.lcssa rather than %load when expanding the
+; backedge taken count.
+
+define void @test(i1 %c, ptr %p) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:br label %[[LOOP_1_PEEL_BEGIN:.*]]
+; CHECK:   [[LOOP_1_PEEL_BEGIN]]:
+; CHECK-NEXT:br label %[[LOOP_1_PEEL:.*]]
+; CHECK:   [[LOOP_1_PEEL]]:
+; CHECK-NEXT:[[LOAD_PEEL:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:[[ADD_PEEL:%.*]] = add i64 [[LOAD_PEEL]], 1
+; CHECK-NEXT:br i1 [[C]], label %[[IF:.*]], label %[[LOOP_1_PEEL_NEXT:.*]]
+; CHECK:   [[LOOP_1_PEEL_NEXT]]:
+; CHECK-NEXT:br label %[[LOOP_1_PEEL_NEXT1:.*]]
+; CHECK:   [[LOOP_1_PEEL_NEXT1]]:
+; CHECK-NEXT:br label %[[ENTRY_PEEL_NEWPH:.*]]
+; CHECK:   [[ENTRY_PEEL_NEWPH]]:
+; CHECK-NEXT:br label %[[LOOP_1:.*]]
+; CHECK:   [[LOOP_1]]:
+; CHECK-NEXT:[[LOAD:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:[[ADD:%.*]] = add i64 [[LOAD]], 1
+; CHECK-NEXT:br i1 [[C]], label %[[IF_LOOPEXIT:.*]], label %[[LOOP_1]], 
!llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:   [[IF_LOOPEXIT]]:
+; CHECK-NEXT:[[ADD_LCSSA_PH:%.*]] = phi i64 [ [[ADD]], %[[LOOP_1]] ]
+; CHECK-NEXT:br label %[[IF]]
+; CHECK:   [[IF]]:
+; CHECK-NEXT:[[ADD_LCSSA:%.*]] = phi i64 [ [[ADD_PEEL]], %[[LOOP_1_PEEL]] 
], [ [[ADD_LCSSA_PH]], %[[IF_LOOPEXIT]] ]
+; CHECK-NEXT:[[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[ADD_LCSSA]]
+; CHECK-NEXT:[[TMP0:%.*]] = shl i64 [[ADD_LCSSA]], 3
+; CHECK-NEXT:[[TMP1:%.*]] = lshr i64 [[TMP0]], 3
+; CHECK-NEXT:[[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:[[XTRAITER:%.*]] = and i64 [[TMP2]], 7
+; CHECK-NEXT:[[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:br i1 [[LCMP_MOD]], label %[[LOOP_2_PROL_PREHEADER:.*]], 
label %[[LOOP_2_PROL_LOOPEXIT:.*]]
+; CHECK:   [[LOOP_2_PROL_PREHEADER]]:
+; CHECK-NEXT:br label %[[LOOP_2_PROL:.*]]
+; CHECK:   [[LOOP_2_PROL]]:
+; CHECK-NEXT:[[IV_PROL:%.*]] = phi ptr [ [[P]], %[[LOOP_2_PROL_PREHEADER]] 
], [ [[IV_NEXT_PROL:%.*]], %[[LOOP_2_PROL]] ]
+; CHECK-NEXT:[[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_2_PROL_PREHEADER]] 
], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_2_PROL]] ]
+; CHECK-NEXT:[[IV_NEXT_PROL]] = getelementptr i8, ptr [[IV_PROL]], i64 8
+; CHECK-NEXT:[[ICMP_PROL:%.*]] = icmp eq ptr [[IV_PROL]], [[GEP]]
+; CHECK-NEXT:[[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:[[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], 
[[XTRAITER]]
+; CHECK-NEXT:br i1 [[PROL_ITER_CMP]], label %[[LOOP_2_PROL]], label 
%[[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:   [[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:[[IV_UNR_PH:%.*]] = phi ptr [ [[IV_NEXT_PROL]], 
%[[LOOP_2_PROL]] ]
+; CHECK-NEXT:br label %[[LOOP_2_PROL_LOOPEXIT]]
+; CHECK:   [[LOOP_2_PROL_LOOPEXIT]]:
+; CHECK-NEXT:[[IV_UNR:%.*]] = phi ptr [ [[P]], %[[IF]] ], [ [[IV_UNR_PH]], 
%[[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA]] ]
+;

[llvm-branch-commits] [llvm] aaa7027 - [LoopPeel] Fix LCSSA phi node invalidation

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Nikita Popov
Date: 2024-10-01T08:52:08+02:00
New Revision: aaa7027716ad347cda75865e99a2ff654bed6bf1

URL: 
https://github.com/llvm/llvm-project/commit/aaa7027716ad347cda75865e99a2ff654bed6bf1
DIFF: 
https://github.com/llvm/llvm-project/commit/aaa7027716ad347cda75865e99a2ff654bed6bf1.diff

LOG: [LoopPeel] Fix LCSSA phi node invalidation

In the test case, the BECount of the second loop uses %load,
but we only have an LCSSA phi node for %add, so that is what
gets invalidated. Use the forgetLcssaPhiWithNewPredecessor()
API instead, which will invalidate the roots of the expression
instead.

Fixes https://github.com/llvm/llvm-project/issues/109333.

(cherry picked from commit 5bcc82d43388bb0daa122d5fe7ecda5eca27fc16)

Added: 
llvm/test/Transforms/LoopUnroll/pr109333.ll

Modified: 
llvm/lib/Transforms/Utils/LoopPeel.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp 
b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 5d7c0d947facc4..760f1619e030c3 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -859,7 +859,7 @@ static void cloneLoopBlocks(
   if (LatchInst && L->contains(LatchInst))
 LatchVal = VMap[LatchVal];
   PHI.addIncoming(LatchVal, cast(VMap[Edge.first]));
-  SE.forgetValue(&PHI);
+  SE.forgetLcssaPhiWithNewPredecessor(L, &PHI);
 }
 
   // LastValueMap is updated with the values for the current loop

diff  --git a/llvm/test/Transforms/LoopUnroll/pr109333.ll 
b/llvm/test/Transforms/LoopUnroll/pr109333.ll
new file mode 100644
index 00..f7ac911a78207a
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/pr109333.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 5
+; RUN: opt -S -passes="print,loop-unroll" -unroll-runtime < 
%s 2>/dev/null | FileCheck %s
+
+; Make sure we use %add.lcssa rather than %load when expanding the
+; backedge taken count.
+
+define void @test(i1 %c, ptr %p) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:br label %[[LOOP_1_PEEL_BEGIN:.*]]
+; CHECK:   [[LOOP_1_PEEL_BEGIN]]:
+; CHECK-NEXT:br label %[[LOOP_1_PEEL:.*]]
+; CHECK:   [[LOOP_1_PEEL]]:
+; CHECK-NEXT:[[LOAD_PEEL:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:[[ADD_PEEL:%.*]] = add i64 [[LOAD_PEEL]], 1
+; CHECK-NEXT:br i1 [[C]], label %[[IF:.*]], label %[[LOOP_1_PEEL_NEXT:.*]]
+; CHECK:   [[LOOP_1_PEEL_NEXT]]:
+; CHECK-NEXT:br label %[[LOOP_1_PEEL_NEXT1:.*]]
+; CHECK:   [[LOOP_1_PEEL_NEXT1]]:
+; CHECK-NEXT:br label %[[ENTRY_PEEL_NEWPH:.*]]
+; CHECK:   [[ENTRY_PEEL_NEWPH]]:
+; CHECK-NEXT:br label %[[LOOP_1:.*]]
+; CHECK:   [[LOOP_1]]:
+; CHECK-NEXT:[[LOAD:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:[[ADD:%.*]] = add i64 [[LOAD]], 1
+; CHECK-NEXT:br i1 [[C]], label %[[IF_LOOPEXIT:.*]], label %[[LOOP_1]], 
!llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:   [[IF_LOOPEXIT]]:
+; CHECK-NEXT:[[ADD_LCSSA_PH:%.*]] = phi i64 [ [[ADD]], %[[LOOP_1]] ]
+; CHECK-NEXT:br label %[[IF]]
+; CHECK:   [[IF]]:
+; CHECK-NEXT:[[ADD_LCSSA:%.*]] = phi i64 [ [[ADD_PEEL]], %[[LOOP_1_PEEL]] 
], [ [[ADD_LCSSA_PH]], %[[IF_LOOPEXIT]] ]
+; CHECK-NEXT:[[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[ADD_LCSSA]]
+; CHECK-NEXT:[[TMP0:%.*]] = shl i64 [[ADD_LCSSA]], 3
+; CHECK-NEXT:[[TMP1:%.*]] = lshr i64 [[TMP0]], 3
+; CHECK-NEXT:[[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:[[XTRAITER:%.*]] = and i64 [[TMP2]], 7
+; CHECK-NEXT:[[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:br i1 [[LCMP_MOD]], label %[[LOOP_2_PROL_PREHEADER:.*]], 
label %[[LOOP_2_PROL_LOOPEXIT:.*]]
+; CHECK:   [[LOOP_2_PROL_PREHEADER]]:
+; CHECK-NEXT:br label %[[LOOP_2_PROL:.*]]
+; CHECK:   [[LOOP_2_PROL]]:
+; CHECK-NEXT:[[IV_PROL:%.*]] = phi ptr [ [[P]], %[[LOOP_2_PROL_PREHEADER]] 
], [ [[IV_NEXT_PROL:%.*]], %[[LOOP_2_PROL]] ]
+; CHECK-NEXT:[[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_2_PROL_PREHEADER]] 
], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_2_PROL]] ]
+; CHECK-NEXT:[[IV_NEXT_PROL]] = getelementptr i8, ptr [[IV_PROL]], i64 8
+; CHECK-NEXT:[[ICMP_PROL:%.*]] = icmp eq ptr [[IV_PROL]], [[GEP]]
+; CHECK-NEXT:[[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:[[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], 
[[XTRAITER]]
+; CHECK-NEXT:br i1 [[PROL_ITER_CMP]], label %[[LOOP_2_PROL]], label 
%[[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:   [[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:[[IV_UNR_PH:%.*]] = phi ptr [ [[IV_NEXT_PROL]], 
%[[LOOP_2_PROL]] ]
+; CHECK-NEXT:br label %[[LOOP_2_PROL_LOOPEXIT]]
+; CHECK:   [[LOOP_2_PROL_LOOPEXIT]]:
+; CHECK-NEXT:[[IV_UNR:%.*]] = phi ptr [ [[P]], %[[IF]] ], [ [[IV_UNR_PH]]

[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)

2024-09-30 Thread via llvm-branch-commits

github-actions[bot] wrote:

@heiher (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/109125
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 997b66e - [clang-scan-deps] Don't inspect Args[0] as an option (#109050)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Martin Storsjö
Date: 2024-10-01T08:53:03+02:00
New Revision: 997b66e566886b8a395b852db46e7930f757b818

URL: 
https://github.com/llvm/llvm-project/commit/997b66e566886b8a395b852db46e7930f757b818
DIFF: 
https://github.com/llvm/llvm-project/commit/997b66e566886b8a395b852db46e7930f757b818.diff

LOG: [clang-scan-deps] Don't inspect Args[0] as an option (#109050)

Since a26ec542371652e1d774696e90016fd5b0b1c191, we expand the executable
name to an absolute path, if it isn't already one, if found in path.

This broke a couple tests in some environments; when the clang workdir
resides in a path under e.g. /opt. Tests that only use a tool name like
"clang-cl" would get expanded to the absolute path in the build tree.
The loop for finding the last "-o" like option for clang-cl command
lines would inspect all arguments, including Args[0] which is the
executable name itself. As an /opt path matches Arg.starts_with("/o"),
this would get detected as an object file output name in cases where
there was no other explicit output argument.

Thus, this fixes those tests in workdirs under e.g. /opt.

(cherry picked from commit cead9044a995910306e2e64b426fcc8042d7e0ef)

Added: 


Modified: 
clang/tools/clang-scan-deps/ClangScanDeps.cpp

Removed: 




diff  --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp 
b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 0f581e73cdfe4b..867df19c863fe5 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -837,7 +837,12 @@ int clang_scan_deps_main(int argc, char **argv, const 
llvm::ToolContext &) {
 
   // Reverse scan, starting at the end or at the element before "--".
   auto R = std::make_reverse_iterator(FlagsEnd);
-  for (auto I = R, E = Args.rend(); I != E; ++I) {
+  auto E = Args.rend();
+  // Don't include Args[0] in the iteration; that's the executable, not
+  // an option.
+  if (E != R)
+E--;
+  for (auto I = R; I != E; ++I) {
 StringRef Arg = *I;
 if (ClangCLMode) {
   // Ignore arguments that are preceded by "-Xclang".



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang-scan-deps] Don't inspect Args[0] as an option (#109050) (PR #109865)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits
Martin =?utf-8?q?Storsj=C3=B6?= ,
Martin =?utf-8?q?Storsj=C3=B6?= ,
Martin =?utf-8?q?Storsj=C3=B6?= 
Message-ID:
In-Reply-To: 


https://github.com/tru closed https://github.com/llvm/llvm-project/pull/109865
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoopPeel] Fix LCSSA phi node invalidation (PR #109624)

2024-09-30 Thread via llvm-branch-commits

github-actions[bot] wrote:

@nikic (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/109624
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoopPeel] Fix LCSSA phi node invalidation (PR #109624)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/109624
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 149bfdd - [Clang][CodeGen] Fix type for atomic float incdec operators (#107075)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Yingwei Zheng
Date: 2024-10-01T08:47:00+02:00
New Revision: 149bfdd61c961edbf49c2ea7fadf9d3c1a79a55e

URL: 
https://github.com/llvm/llvm-project/commit/149bfdd61c961edbf49c2ea7fadf9d3c1a79a55e
DIFF: 
https://github.com/llvm/llvm-project/commit/149bfdd61c961edbf49c2ea7fadf9d3c1a79a55e.diff

LOG: [Clang][CodeGen] Fix type for atomic float incdec operators (#107075)

`llvm::ConstantFP::get(llvm::LLVMContext&, APFloat(float))` always
returns a f32 constant.
Fix https://github.com/llvm/llvm-project/issues/107054.

Added: 


Modified: 
clang/lib/CodeGen/CGExprScalar.cpp
clang/test/CodeGen/X86/x86-atomic-double.c
clang/test/CodeGen/X86/x86-atomic-long_double.c

Removed: 




diff  --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index a17d68424bbce5..6e212e74676e8d 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2833,18 +2833,22 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const 
UnaryOperator *E, LValue LV,
   
llvm::AtomicOrdering::SequentiallyConsistent);
   return isPre ? Builder.CreateBinOp(op, old, amt) : old;
 }
-// Special case for atomic increment/decrement on floats
+// Special case for atomic increment/decrement on floats.
+// Bail out non-power-of-2-sized floating point types (e.g., x86_fp80).
 if (type->isFloatingType()) {
-  llvm::AtomicRMWInst::BinOp aop =
-  isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub;
-  llvm::Instruction::BinaryOps op =
-  isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
-  llvm::Value *amt = llvm::ConstantFP::get(
-  VMContext, llvm::APFloat(static_cast(1.0)));
-  llvm::Value *old =
-  Builder.CreateAtomicRMW(aop, LV.getAddress(), amt,
-  
llvm::AtomicOrdering::SequentiallyConsistent);
-  return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+  llvm::Type *Ty = ConvertType(type);
+  if (llvm::has_single_bit(Ty->getScalarSizeInBits())) {
+llvm::AtomicRMWInst::BinOp aop =
+isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub;
+llvm::Instruction::BinaryOps op =
+isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
+llvm::Value *amt = llvm::ConstantFP::get(Ty, 1.0);
+llvm::AtomicRMWInst *old = Builder.CreateAtomicRMW(
+aop, LV.getAddress(), amt,
+llvm::AtomicOrdering::SequentiallyConsistent);
+
+return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+  }
 }
 value = EmitLoadOfLValue(LV, E->getExprLoc());
 input = value;

diff  --git a/clang/test/CodeGen/X86/x86-atomic-double.c 
b/clang/test/CodeGen/X86/x86-atomic-double.c
index 2354c89cc2b170..09c8f70c3db854 100644
--- a/clang/test/CodeGen/X86/x86-atomic-double.c
+++ b/clang/test/CodeGen/X86/x86-atomic-double.c
@@ -6,20 +6,14 @@
 // X64-LABEL: define dso_local double @test_double_post_inc(
 // X64-SAME: ) #[[ATTR0:[0-9]+]] {
 // X64-NEXT:  entry:
-// X64-NEXT:[[RETVAL:%.*]] = alloca double, align 8
-// X64-NEXT:[[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, 
float 1.00e+00 seq_cst, align 8
-// X64-NEXT:store float [[TMP0]], ptr [[RETVAL]], align 8
-// X64-NEXT:[[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8
-// X64-NEXT:ret double [[TMP1]]
+// X64-NEXT:[[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, 
double 1.00e+00 seq_cst, align 8
+// X64-NEXT:ret double [[TMP0]]
 //
 // X86-LABEL: define dso_local double @test_double_post_inc(
 // X86-SAME: ) #[[ATTR0:[0-9]+]] {
 // X86-NEXT:  entry:
-// X86-NEXT:[[RETVAL:%.*]] = alloca double, align 4
-// X86-NEXT:[[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, 
float 1.00e+00 seq_cst, align 8
-// X86-NEXT:store float [[TMP0]], ptr [[RETVAL]], align 4
-// X86-NEXT:[[TMP1:%.*]] = load double, ptr [[RETVAL]], align 4
-// X86-NEXT:ret double [[TMP1]]
+// X86-NEXT:[[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, 
double 1.00e+00 seq_cst, align 8
+// X86-NEXT:ret double [[TMP0]]
 //
 double test_double_post_inc()
 {
@@ -30,20 +24,14 @@ double test_double_post_inc()
 // X64-LABEL: define dso_local double @test_double_post_dc(
 // X64-SAME: ) #[[ATTR0]] {
 // X64-NEXT:  entry:
-// X64-NEXT:[[RETVAL:%.*]] = alloca double, align 8
-// X64-NEXT:[[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, 
float 1.00e+00 seq_cst, align 8
-// X64-NEXT:store float [[TMP0]], ptr [[RETVAL]], align 8
-// X64-NEXT:[[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8
-// X64-NEXT:ret double [[TMP1]]
+// X64-NEXT:[[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, 
double 1.00e+00 seq_cst, align 8
+// X64-NEXT:ret double [[TMP0]]
 //
 // X86-LABEL: define dso_local double @test_doub

[llvm-branch-commits] [llvm] b3731b3 - [DAGCombiner] cache negative result from getMergeStoreCandidates() (#106949)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Princeton Ferro
Date: 2024-10-01T08:47:51+02:00
New Revision: b3731b36421e23737be2b4785700267b96c3241f

URL: 
https://github.com/llvm/llvm-project/commit/b3731b36421e23737be2b4785700267b96c3241f
DIFF: 
https://github.com/llvm/llvm-project/commit/b3731b36421e23737be2b4785700267b96c3241f.diff

LOG: [DAGCombiner] cache negative result from getMergeStoreCandidates() 
(#106949)

Cache negative search result from getStoreMergeCandidates() so that
mergeConsecutiveStores() does not iterate quadratically over a
potentially long sequence of unmergeable stores.

(cherry picked from commit 8f77d37f256809766fd83a09c6d144b785e9165a)

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 71cdec91e5f67a..7b1f1dc40211d5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -191,6 +191,11 @@ namespace {
 // AA - Used for DAG load/store alias analysis.
 AliasAnalysis *AA;
 
+/// This caches all chains that have already been processed in
+/// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
+/// stores candidates.
+SmallPtrSet ChainsWithoutMergeableStores;
+
 /// When an instruction is simplified, add all users of the instruction to
 /// the work lists because they might get more simplified now.
 void AddUsersToWorklist(SDNode *N) {
@@ -776,11 +781,10 @@ namespace {
  bool UseTrunc);
 
 /// This is a helper function for mergeConsecutiveStores. Stores that
-/// potentially may be merged with St are placed in StoreNodes. RootNode is
-/// a chain predecessor to all store candidates.
-void getStoreMergeCandidates(StoreSDNode *St,
- SmallVectorImpl &StoreNodes,
- SDNode *&Root);
+/// potentially may be merged with St are placed in StoreNodes. On success,
+/// returns a chain predecessor to all store candidates.
+SDNode *getStoreMergeCandidates(StoreSDNode *St,
+SmallVectorImpl &StoreNodes);
 
 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
 /// have indirect dependency through their operands. RootNode is the
@@ -1782,6 +1786,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
 
 ++NodesCombined;
 
+// Invalidate cached info.
+ChainsWithoutMergeableStores.clear();
+
 // If we get back the same node we passed in, rather than a new node or
 // zero, we know that the node must have defined multiple values and
 // CombineTo was used.  Since CombineTo takes care of the worklist
@@ -20372,15 +20379,15 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
   return true;
 }
 
-void DAGCombiner::getStoreMergeCandidates(
-StoreSDNode *St, SmallVectorImpl &StoreNodes,
-SDNode *&RootNode) {
+SDNode *
+DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
+ SmallVectorImpl &StoreNodes) {
   // This holds the base pointer, index, and the offset in bytes from the base
   // pointer. We must have a base and an offset. Do not handle stores to undef
   // base pointers.
   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
-return;
+return nullptr;
 
   SDValue Val = peekThroughBitcasts(St->getValue());
   StoreSource StoreSrc = getStoreSource(Val);
@@ -20396,14 +20403,14 @@ void DAGCombiner::getStoreMergeCandidates(
 LoadVT = Ld->getMemoryVT();
 // Load and store should be the same type.
 if (MemVT != LoadVT)
-  return;
+  return nullptr;
 // Loads must only have one use.
 if (!Ld->hasNUsesOfValue(1, 0))
-  return;
+  return nullptr;
 // The memory operands must not be volatile/indexed/atomic.
 // TODO: May be able to relax for unordered atomics (see D66309)
 if (!Ld->isSimple() || Ld->isIndexed())
-  return;
+  return nullptr;
   }
   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
 int64_t &Offset) -> bool {
@@ -20471,6 +20478,27 @@ void DAGCombiner::getStoreMergeCandidates(
 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
   };
 
+  // We are looking for a root node which is an ancestor to all mergable
+  // stores. We search up through a load, to our root and then down
+  // through all children. For instance we will find Store{1,2,3} if
+  // St is Store1, Store2. or Store3 where the root is not a load
+  // which always true for nonvolatile ops. TODO: Expand
+  // the search to find all valid candidates through multiple layers of loads.
+  //
+  // Root
+  // |---|---|
+  // LoadLoadStore3
+  // |   |
+  // Sto

[llvm-branch-commits] [clang] Backport "[Clang][CodeGen] Fix type for atomic float incdec operators (#107075)" (PR #107184)

2024-09-30 Thread via llvm-branch-commits

github-actions[bot] wrote:

@dtcxzyw (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/107184
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [DAGCombiner] cache negative result from getMergeStoreCandidates() (#106949) (PR #108397)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/108397

>From b3731b36421e23737be2b4785700267b96c3241f Mon Sep 17 00:00:00 2001
From: Princeton Ferro 
Date: Wed, 4 Sep 2024 07:18:53 -0700
Subject: [PATCH] [DAGCombiner] cache negative result from
 getMergeStoreCandidates() (#106949)

Cache negative search result from getStoreMergeCandidates() so that
mergeConsecutiveStores() does not iterate quadratically over a
potentially long sequence of unmergeable stores.

(cherry picked from commit 8f77d37f256809766fd83a09c6d144b785e9165a)
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 83 ---
 1 file changed, 51 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 71cdec91e5f67a..7b1f1dc40211d5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -191,6 +191,11 @@ namespace {
 // AA - Used for DAG load/store alias analysis.
 AliasAnalysis *AA;
 
+/// This caches all chains that have already been processed in
+/// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
+/// stores candidates.
+SmallPtrSet ChainsWithoutMergeableStores;
+
 /// When an instruction is simplified, add all users of the instruction to
 /// the work lists because they might get more simplified now.
 void AddUsersToWorklist(SDNode *N) {
@@ -776,11 +781,10 @@ namespace {
  bool UseTrunc);
 
 /// This is a helper function for mergeConsecutiveStores. Stores that
-/// potentially may be merged with St are placed in StoreNodes. RootNode is
-/// a chain predecessor to all store candidates.
-void getStoreMergeCandidates(StoreSDNode *St,
- SmallVectorImpl &StoreNodes,
- SDNode *&Root);
+/// potentially may be merged with St are placed in StoreNodes. On success,
+/// returns a chain predecessor to all store candidates.
+SDNode *getStoreMergeCandidates(StoreSDNode *St,
+SmallVectorImpl &StoreNodes);
 
 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
 /// have indirect dependency through their operands. RootNode is the
@@ -1782,6 +1786,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
 
 ++NodesCombined;
 
+// Invalidate cached info.
+ChainsWithoutMergeableStores.clear();
+
 // If we get back the same node we passed in, rather than a new node or
 // zero, we know that the node must have defined multiple values and
 // CombineTo was used.  Since CombineTo takes care of the worklist
@@ -20372,15 +20379,15 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
   return true;
 }
 
-void DAGCombiner::getStoreMergeCandidates(
-StoreSDNode *St, SmallVectorImpl &StoreNodes,
-SDNode *&RootNode) {
+SDNode *
+DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
+ SmallVectorImpl &StoreNodes) {
   // This holds the base pointer, index, and the offset in bytes from the base
   // pointer. We must have a base and an offset. Do not handle stores to undef
   // base pointers.
   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
-return;
+return nullptr;
 
   SDValue Val = peekThroughBitcasts(St->getValue());
   StoreSource StoreSrc = getStoreSource(Val);
@@ -20396,14 +20403,14 @@ void DAGCombiner::getStoreMergeCandidates(
 LoadVT = Ld->getMemoryVT();
 // Load and store should be the same type.
 if (MemVT != LoadVT)
-  return;
+  return nullptr;
 // Loads must only have one use.
 if (!Ld->hasNUsesOfValue(1, 0))
-  return;
+  return nullptr;
 // The memory operands must not be volatile/indexed/atomic.
 // TODO: May be able to relax for unordered atomics (see D66309)
 if (!Ld->isSimple() || Ld->isIndexed())
-  return;
+  return nullptr;
   }
   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
 int64_t &Offset) -> bool {
@@ -20471,6 +20478,27 @@ void DAGCombiner::getStoreMergeCandidates(
 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
   };
 
+  // We are looking for a root node which is an ancestor to all mergable
+  // stores. We search up through a load, to our root and then down
+  // through all children. For instance we will find Store{1,2,3} if
+  // St is Store1, Store2. or Store3 where the root is not a load
+  // which always true for nonvolatile ops. TODO: Expand
+  // the search to find all valid candidates through multiple layers of loads.
+  //
+  // Root
+  // |---|---|
+  // LoadLoadStore3
+  // |   |
+  // Store1   Store2
+  //
+  // FIXME: We should be able to climb and
+  // descend TokenFactors to find candidates as well.
+
+  S

[llvm-branch-commits] [llvm] release/19.x: [DAGCombiner] cache negative result from getMergeStoreCandidates() (#106949) (PR #108397)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/108397
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [DAGCombiner] cache negative result from getMergeStoreCandidates() (#106949) (PR #108397)

2024-09-30 Thread via llvm-branch-commits

github-actions[bot] wrote:

@nikic (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/108397
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] 8a25c60 - [libc++] Disable the clang-tidy checks to get CI back (#109989)

2024-09-30 Thread Louis Dionne via llvm-branch-commits

Author: Louis Dionne
Date: 2024-09-26T16:00:43-04:00
New Revision: 8a25c601eb64bcdb7c6c74bee52655468dfdd91b

URL: 
https://github.com/llvm/llvm-project/commit/8a25c601eb64bcdb7c6c74bee52655468dfdd91b
DIFF: 
https://github.com/llvm/llvm-project/commit/8a25c601eb64bcdb7c6c74bee52655468dfdd91b.diff

LOG: [libc++] Disable the clang-tidy checks to get CI back (#109989)

The CI has been a complete mess for the past week, and the only thing
preventing it from being back is the Clang tidy checks. Disable them (as
a total hack) to get CI back.

(cherry picked from commit 78c6506543dee13c9335edc5c85bc73c4853fbd7)

Added: 


Modified: 
libcxx/test/tools/clang_tidy_checks/CMakeLists.txt

Removed: 




diff  --git a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt 
b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt
index f0289dc44c6625..125b2184a49ea6 100644
--- a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt
+++ b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt
@@ -1,3 +1,5 @@
+# TODO: Re-enable the tests once the CI is back under control
+return()
 
 # The find_package changes these variables. This leaves the build in an odd
 # state. Calling cmake a second time tries to write site config information in



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109920

>From 8679d1b51bd91d638ac3babba03a404e4031f9ea Mon Sep 17 00:00:00 2001
From: Timothy Pearson <162513562+tpearson-...@users.noreply.github.com>
Date: Wed, 25 Sep 2024 02:09:50 -0500
Subject: [PATCH] [SDAG] Honor signed arguments in floating point libcalls
 (#109134)

In ExpandFPLibCall, an assumption is made that all floating point
libcalls that take integer arguments use unsigned integers. In the case
of ldexp and frexp, this assumption is incorrect, leading to
miscompilation and subsequent target-dependent incorrect operation.

Indicate that ldexp and frexp utilize signed arguments in
ExpandFPLibCall.

Fixes #108904

Signed-off-by: Timothy Pearson 
(cherry picked from commit 90c14748638f1e10e31173b145fdbb5c4529c922)
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |  3 +-
 llvm/test/CodeGen/PowerPC/ldexp-libcall.ll|  4 +-
 llvm/test/CodeGen/PowerPC/ldexp.ll| 36 ++
 .../PowerPC/negative-integer-fp-libcall.ll| 26 +++
 .../X86/fold-int-pow2-with-fmul-or-fdiv.ll| 69 ---
 5 files changed, 96 insertions(+), 42 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7f5b46af01c62f..4b25f553ffae91 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2190,7 +2190,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
 Results.push_back(Tmp.first);
 Results.push_back(Tmp.second);
   } else {
-SDValue Tmp = ExpandLibCall(LC, Node, false).first;
+bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP;
+SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first;
 Results.push_back(Tmp);
   }
 }
diff --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll 
b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
index 6144a9d9203651..e531516c37e87e 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
@@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) {
 ; CHECK-NEXT:std r0, 48(r1)
 ; CHECK-NEXT:.cfi_def_cfa_offset 32
 ; CHECK-NEXT:.cfi_offset lr, 16
-; CHECK-NEXT:clrldi r4, r4, 32
+; CHECK-NEXT:extsw r4, r4
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:addi r1, r1, 32
@@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) {
 ; CHECK-NEXT:std r0, 48(r1)
 ; CHECK-NEXT:.cfi_def_cfa_offset 32
 ; CHECK-NEXT:.cfi_offset lr, 16
-; CHECK-NEXT:clrldi r4, r4, 32
+; CHECK-NEXT:extsw r4, r4
 ; CHECK-NEXT:bl ldexp
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:addi r1, r1, 32
diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll 
b/llvm/test/CodeGen/PowerPC/ldexp.ll
index 151df6096b30bd..ffc826cc86de59 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp.ll
@@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> 
%exp) {
 ; CHECK-NEXT:.cfi_offset v29, -48
 ; CHECK-NEXT:.cfi_offset v30, -32
 ; CHECK-NEXT:.cfi_offset v31, -16
-; CHECK-NEXT:xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:li r3, 0
+; CHECK-NEXT:xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:stxv v29, 32(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:xscvspdpn f1, vs0
-; CHECK-NEXT:vextuwrx r4, r3, v3
+; CHECK-NEXT:vextuwrx r3, r3, v3
 ; CHECK-NEXT:stxv v30, 48(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v31, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:vmr v31, v3
 ; CHECK-NEXT:vmr v30, v2
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
-; CHECK-NEXT:xxswapd vs0, v30
 ; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:xxswapd vs0, v30
 ; CHECK-NEXT:xscvdpspn v29, f1
 ; CHECK-NEXT:xscvspdpn f1, vs0
-; CHECK-NEXT:vextuwrx r4, r3, v31
+; CHECK-NEXT:vextuwrx r3, r3, v31
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:xscvdpspn vs0, f1
@@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x 
i32> %exp) {
 ; CHECK-NEXT:.cfi_offset v29, -48
 ; CHECK-NEXT:.cfi_offset v30, -32
 ; CHECK-NEXT:.cfi_offset v31, -16
-; CHECK-NEXT:li r3, 12
-; CHECK-NEXT:xscvspdpn f1, v2
+; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:xxswapd vs0, v2
 ; CHECK-NEXT:stxv v28, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:xscvspdpn f1, vs0
+; CHECK-NEXT:vextuwrx r3, r3, v3
 ; CHECK-NEXT:stxv v29, 48(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v30, 64(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v31, 80(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:vmr v31, v3
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:vmr v30, v2
-; CHECK-NEXT:vextuwrx r4, r3, v3
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
-; CHECK-NEXT:xxswapd vs0, v30
-; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:

[llvm-branch-commits] [llvm] 8679d1b - [SDAG] Honor signed arguments in floating point libcalls (#109134)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

Author: Timothy Pearson
Date: 2024-10-01T08:55:02+02:00
New Revision: 8679d1b51bd91d638ac3babba03a404e4031f9ea

URL: 
https://github.com/llvm/llvm-project/commit/8679d1b51bd91d638ac3babba03a404e4031f9ea
DIFF: 
https://github.com/llvm/llvm-project/commit/8679d1b51bd91d638ac3babba03a404e4031f9ea.diff

LOG: [SDAG] Honor signed arguments in floating point libcalls (#109134)

In ExpandFPLibCall, an assumption is made that all floating point
libcalls that take integer arguments use unsigned integers. In the case
of ldexp and frexp, this assumption is incorrect, leading to
miscompilation and subsequent target-dependent incorrect operation.

Indicate that ldexp and frexp utilize signed arguments in
ExpandFPLibCall.

Fixes #108904

Signed-off-by: Timothy Pearson 
(cherry picked from commit 90c14748638f1e10e31173b145fdbb5c4529c922)

Added: 
llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll

Modified: 
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
llvm/test/CodeGen/PowerPC/ldexp.ll
llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7f5b46af01c62f..4b25f553ffae91 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2190,7 +2190,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
 Results.push_back(Tmp.first);
 Results.push_back(Tmp.second);
   } else {
-SDValue Tmp = ExpandLibCall(LC, Node, false).first;
+bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP;
+SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first;
 Results.push_back(Tmp);
   }
 }

diff  --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll 
b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
index 6144a9d9203651..e531516c37e87e 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll
@@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) {
 ; CHECK-NEXT:std r0, 48(r1)
 ; CHECK-NEXT:.cfi_def_cfa_offset 32
 ; CHECK-NEXT:.cfi_offset lr, 16
-; CHECK-NEXT:clrldi r4, r4, 32
+; CHECK-NEXT:extsw r4, r4
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:addi r1, r1, 32
@@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) {
 ; CHECK-NEXT:std r0, 48(r1)
 ; CHECK-NEXT:.cfi_def_cfa_offset 32
 ; CHECK-NEXT:.cfi_offset lr, 16
-; CHECK-NEXT:clrldi r4, r4, 32
+; CHECK-NEXT:extsw r4, r4
 ; CHECK-NEXT:bl ldexp
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:addi r1, r1, 32

diff  --git a/llvm/test/CodeGen/PowerPC/ldexp.ll 
b/llvm/test/CodeGen/PowerPC/ldexp.ll
index 151df6096b30bd..ffc826cc86de59 100644
--- a/llvm/test/CodeGen/PowerPC/ldexp.ll
+++ b/llvm/test/CodeGen/PowerPC/ldexp.ll
@@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> 
%exp) {
 ; CHECK-NEXT:.cfi_offset v29, -48
 ; CHECK-NEXT:.cfi_offset v30, -32
 ; CHECK-NEXT:.cfi_offset v31, -16
-; CHECK-NEXT:xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:li r3, 0
+; CHECK-NEXT:xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:stxv v29, 32(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:xscvspdpn f1, vs0
-; CHECK-NEXT:vextuwrx r4, r3, v3
+; CHECK-NEXT:vextuwrx r3, r3, v3
 ; CHECK-NEXT:stxv v30, 48(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v31, 64(r1) # 16-byte Folded Spill
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:vmr v31, v3
 ; CHECK-NEXT:vmr v30, v2
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
-; CHECK-NEXT:xxswapd vs0, v30
 ; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:xxswapd vs0, v30
 ; CHECK-NEXT:xscvdpspn v29, f1
 ; CHECK-NEXT:xscvspdpn f1, vs0
-; CHECK-NEXT:vextuwrx r4, r3, v31
+; CHECK-NEXT:vextuwrx r3, r3, v31
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
 ; CHECK-NEXT:xscvdpspn vs0, f1
@@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x 
i32> %exp) {
 ; CHECK-NEXT:.cfi_offset v29, -48
 ; CHECK-NEXT:.cfi_offset v30, -32
 ; CHECK-NEXT:.cfi_offset v31, -16
-; CHECK-NEXT:li r3, 12
-; CHECK-NEXT:xscvspdpn f1, v2
+; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:xxswapd vs0, v2
 ; CHECK-NEXT:stxv v28, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:xscvspdpn f1, vs0
+; CHECK-NEXT:vextuwrx r3, r3, v3
 ; CHECK-NEXT:stxv v29, 48(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v30, 64(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:stxv v31, 80(r1) # 16-byte Folded Spill
 ; CHECK-NEXT:vmr v31, v3
+; CHECK-NEXT:extsw r4, r3
 ; CHECK-NEXT:vmr v30, v2
-; CHECK-NEXT:vextuwrx r4, r3, v3
 ; CHECK-NEXT:bl ldexpf
 ; CHECK-NEXT:nop
-; CHECK-NEXT:xxswapd vs0, v30
-; CHECK-NEXT:li r3, 4
+; CHECK-NEXT:li r3, 12
 ; CHECK-NEXT:   

[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)

2024-09-30 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/109920
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [SDAG] Honor signed arguments in floating point libcalls (#109134) (PR #109920)

2024-09-30 Thread via llvm-branch-commits

github-actions[bot] wrote:

@nikic (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/109920
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits