[llvm-branch-commits] [compiler-rt] release/19.x: [compiler-rt] Remove SHA2 interceptions for NetBSD/Free… (PR #111725)

2024-10-09 Thread David CARLIER via llvm-branch-commits

https://github.com/devnexen created 
https://github.com/llvm/llvm-project/pull/111725

…BSD. (#110246)

For backport #111724

To Fix #110215

Interceptors introduced with 18a7ebda99044473fdbce6376993714ff54e6690

>From cb632327b87e78506d6d91d1d7244e5a5f5c87e6 Mon Sep 17 00:00:00 2001
From: David CARLIER 
Date: Wed, 9 Oct 2024 05:47:00 +0100
Subject: [PATCH] release/19.x: [compiler-rt] Remove SHA2 interceptions for
 NetBSD/FreeBSD. (#110246)

To Fix #110215

Interceptors introduced with 18a7ebda99044473fdbce6376993714ff54e6690
---
 .../sanitizer_common_interceptors.inc | 180 ---
 .../sanitizer_platform_interceptors.h |   2 -
 .../TestCases/FreeBSD/md5.cpp | 119 --
 .../TestCases/FreeBSD/sha2.cpp| 214 --
 .../sanitizer_common/TestCases/NetBSD/md5.cpp | 114 --
 .../TestCases/NetBSD/sha2.cpp | 206 -
 6 files changed, 835 deletions(-)
 delete mode 100644 compiler-rt/test/sanitizer_common/TestCases/FreeBSD/md5.cpp
 delete mode 100644 compiler-rt/test/sanitizer_common/TestCases/FreeBSD/sha2.cpp
 delete mode 100644 compiler-rt/test/sanitizer_common/TestCases/NetBSD/md5.cpp
 delete mode 100644 compiler-rt/test/sanitizer_common/TestCases/NetBSD/sha2.cpp

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc 
b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 49c9dcbef358ff..7a7af7936af315 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -8823,83 +8823,6 @@ INTERCEPTOR(char *, RMD160Data, u8 *data, SIZE_T len, 
char *buf) {
 #define INIT_RMD160
 #endif
 
-#if SANITIZER_INTERCEPT_MD5
-INTERCEPTOR(void, MD5Init, void *context) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Init, context);
-  REAL(MD5Init)(context);
-  if (context)
-COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz);
-}
-
-INTERCEPTOR(void, MD5Update, void *context, const unsigned char *data,
-unsigned int len) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Update, context, data, len);
-  if (data && len > 0)
-COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
-  if (context)
-COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
-  REAL(MD5Update)(context, data, len);
-  if (context)
-COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz);
-}
-
-INTERCEPTOR(void, MD5Final, unsigned char digest[16], void *context) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Final, digest, context);
-  if (context)
-COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
-  REAL(MD5Final)(digest, context);
-  if (digest)
-COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(unsigned char) * 16);
-}
-
-INTERCEPTOR(char *, MD5End, void *context, char *buf) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5End, context, buf);
-  if (context)
-COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
-  char *ret = REAL(MD5End)(context, buf);
-  if (ret)
-COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
-  return ret;
-}
-
-INTERCEPTOR(char *, MD5File, const char *filename, char *buf) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5File, filename, buf);
-  if (filename)
-COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 
1);
-  char *ret = REAL(MD5File)(filename, buf);
-  if (ret)
-COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
-  return ret;
-}
-
-INTERCEPTOR(char *, MD5Data, const unsigned char *data, unsigned int len,
-char *buf) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Data, data, len, buf);
-  if (data && len > 0)
-COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
-  char *ret = REAL(MD5Data)(data, len, buf);
-  if (ret)
-COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
-  return ret;
-}
-
-#define INIT_MD5   
\
-  COMMON_INTERCEPT_FUNCTION(MD5Init);  
\
-  COMMON_INTERCEPT_FUNCTION(MD5Update);
\
-  COMMON_INTERCEPT_FUNCTION(MD5Final); 
\
-  COMMON_INTERCEPT_FUNCTION(MD5End);   
\
-  COMMON_INTERCEPT_FUNCTION(MD5File);  
\
-  COMMON_INTERCEPT_FUNCTION(MD5Data)
-#else
-#define INIT_MD5
-#endif
-
 #if SANITIZER_INTERCEPT_FSEEK
 INTERCEPTOR(int, fseek, __sanitizer_FILE *stream, long int offset, int whence) 
{
   void *ctx;
@@ -9030,107 +8953,6 @@ INTERCEPTOR(char *, MD2Data, const unsigned char *data, 
unsigned int len,
 #define INIT_MD2
 #endif
 
-#if SANITIZER_INTERCEPT_SHA2
-#define SHA2_INTERCEPTORS(LEN, SHA2_STATE_T) \
-  INTERCEPTOR(void, SHA##LEN##_Init, void *context) { \
-void *ctx; \
-COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Init, context); \
-REAL(SHA#

[llvm-branch-commits] [compiler-rt] release/19.x: [compiler-rt] Remove SHA2 interceptions for NetBSD/Free… (PR #111725)

2024-10-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: David CARLIER (devnexen)


Changes

…BSD. (#110246)

For backport #111724

To Fix #110215

Interceptors introduced with 18a7ebda99044473fdbce6376993714ff54e6690

---

Patch is 28.40 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/111725.diff


6 Files Affected:

- (modified) compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc 
(-180) 
- (modified) compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h 
(-2) 
- (removed) compiler-rt/test/sanitizer_common/TestCases/FreeBSD/md5.cpp (-119) 
- (removed) compiler-rt/test/sanitizer_common/TestCases/FreeBSD/sha2.cpp (-214) 
- (removed) compiler-rt/test/sanitizer_common/TestCases/NetBSD/md5.cpp (-114) 
- (removed) compiler-rt/test/sanitizer_common/TestCases/NetBSD/sha2.cpp (-206) 


``diff
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc 
b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 49c9dcbef358ff..7a7af7936af315 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -8823,83 +8823,6 @@ INTERCEPTOR(char *, RMD160Data, u8 *data, SIZE_T len, 
char *buf) {
 #define INIT_RMD160
 #endif
 
-#if SANITIZER_INTERCEPT_MD5
-INTERCEPTOR(void, MD5Init, void *context) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Init, context);
-  REAL(MD5Init)(context);
-  if (context)
-COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz);
-}
-
-INTERCEPTOR(void, MD5Update, void *context, const unsigned char *data,
-unsigned int len) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Update, context, data, len);
-  if (data && len > 0)
-COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
-  if (context)
-COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
-  REAL(MD5Update)(context, data, len);
-  if (context)
-COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz);
-}
-
-INTERCEPTOR(void, MD5Final, unsigned char digest[16], void *context) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Final, digest, context);
-  if (context)
-COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
-  REAL(MD5Final)(digest, context);
-  if (digest)
-COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(unsigned char) * 16);
-}
-
-INTERCEPTOR(char *, MD5End, void *context, char *buf) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5End, context, buf);
-  if (context)
-COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
-  char *ret = REAL(MD5End)(context, buf);
-  if (ret)
-COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
-  return ret;
-}
-
-INTERCEPTOR(char *, MD5File, const char *filename, char *buf) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5File, filename, buf);
-  if (filename)
-COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 
1);
-  char *ret = REAL(MD5File)(filename, buf);
-  if (ret)
-COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
-  return ret;
-}
-
-INTERCEPTOR(char *, MD5Data, const unsigned char *data, unsigned int len,
-char *buf) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, MD5Data, data, len, buf);
-  if (data && len > 0)
-COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
-  char *ret = REAL(MD5Data)(data, len, buf);
-  if (ret)
-COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
-  return ret;
-}
-
-#define INIT_MD5   
\
-  COMMON_INTERCEPT_FUNCTION(MD5Init);  
\
-  COMMON_INTERCEPT_FUNCTION(MD5Update);
\
-  COMMON_INTERCEPT_FUNCTION(MD5Final); 
\
-  COMMON_INTERCEPT_FUNCTION(MD5End);   
\
-  COMMON_INTERCEPT_FUNCTION(MD5File);  
\
-  COMMON_INTERCEPT_FUNCTION(MD5Data)
-#else
-#define INIT_MD5
-#endif
-
 #if SANITIZER_INTERCEPT_FSEEK
 INTERCEPTOR(int, fseek, __sanitizer_FILE *stream, long int offset, int whence) 
{
   void *ctx;
@@ -9030,107 +8953,6 @@ INTERCEPTOR(char *, MD2Data, const unsigned char *data, 
unsigned int len,
 #define INIT_MD2
 #endif
 
-#if SANITIZER_INTERCEPT_SHA2
-#define SHA2_INTERCEPTORS(LEN, SHA2_STATE_T) \
-  INTERCEPTOR(void, SHA##LEN##_Init, void *context) { \
-void *ctx; \
-COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Init, context); \
-REAL(SHA##LEN##_Init)(context); \
-if (context) \
-  COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
-  } \
-  INTERCEPTOR(void, SHA##LEN##_Update, void *context, \
-  const u8 *data, SIZE_T len) { \
-void *ctx; \
-COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Update, context, data, len); \
-if (data && len > 0) \
-  COMMON_INTERCEPTOR_READ_RANGE(ctx, d

[llvm-branch-commits] [llvm] cee0873 - Revert "[SandboxVectorizer] Use sbvec-passes flag to create a pipeline of Reg…"

2024-10-09 Thread via llvm-branch-commits

Author: Jorge Gorbe Moya
Date: 2024-10-09T10:44:19-07:00
New Revision: cee0873fc83beb441a27b1f0555da54e56aa0910

URL: 
https://github.com/llvm/llvm-project/commit/cee0873fc83beb441a27b1f0555da54e56aa0910
DIFF: 
https://github.com/llvm/llvm-project/commit/cee0873fc83beb441a27b1f0555da54e56aa0910.diff

LOG: Revert "[SandboxVectorizer] Use sbvec-passes flag to create a pipeline of 
Reg…"

This reverts commit 10ada4ae738b9d93174e516ca841e61a8f4fd612.

Added: 


Modified: 
llvm/include/llvm/SandboxIR/PassManager.h

llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h
llvm/lib/SandboxIR/PassManager.cpp
llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
llvm/test/Transforms/SandboxVectorizer/default_pass_pipeline.ll
llvm/test/Transforms/SandboxVectorizer/user_pass_pipeline.ll
llvm/unittests/SandboxIR/PassTest.cpp

Removed: 
llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h
llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def



diff  --git a/llvm/include/llvm/SandboxIR/PassManager.h 
b/llvm/include/llvm/SandboxIR/PassManager.h
index 247c43615f5766..54192c6bf1333b 100644
--- a/llvm/include/llvm/SandboxIR/PassManager.h
+++ b/llvm/include/llvm/SandboxIR/PassManager.h
@@ -18,8 +18,6 @@
 #ifndef LLVM_SANDBOXIR_PASSMANAGER_H
 #define LLVM_SANDBOXIR_PASSMANAGER_H
 
-#include 
-
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/SandboxIR/Pass.h"
@@ -34,65 +32,25 @@ template 
 class PassManager : public ParentPass {
 protected:
   /// The list of passes that this pass manager will run.
-  SmallVector> Passes;
+  SmallVector Passes;
 
   PassManager(StringRef Name) : ParentPass(Name) {}
   PassManager(const PassManager &) = delete;
-  PassManager(PassManager &&) = default;
   virtual ~PassManager() = default;
   PassManager &operator=(const PassManager &) = delete;
 
 public:
   /// Adds \p Pass to the pass pipeline.
-  void addPass(std::unique_ptr Pass) {
+  void addPass(ContainedPass *Pass) {
 // TODO: Check that Pass's class type works with this PassManager type.
-Passes.push_back(std::move(Pass));
-  }
-
-  using CreatePassFunc =
-  std::function(StringRef)>;
-
-  /// Parses \p Pipeline as a comma-separated sequence of pass names and sets
-  /// the pass pipeline, using \p CreatePass to instantiate passes by name.
-  ///
-  /// After calling this function, the PassManager contains only the specified
-  /// pipeline, any previously added passes are cleared.
-  void setPassPipeline(StringRef Pipeline, CreatePassFunc CreatePass) {
-static constexpr const char EndToken = '\0';
-static constexpr const char PassDelimToken = ',';
-
-assert(Passes.empty() &&
-   "setPassPipeline called on a non-empty sandboxir::PassManager");
-// Add EndToken to the end to ease parsing.
-std::string PipelineStr = std::string(Pipeline) + EndToken;
-int FlagBeginIdx = 0;
-
-for (auto [Idx, C] : enumerate(PipelineStr)) {
-  // Keep moving Idx until we find the end of the pass name.
-  bool FoundDelim = C == EndToken || C == PassDelimToken;
-  if (!FoundDelim)
-continue;
-  unsigned Sz = Idx - FlagBeginIdx;
-  std::string PassName(&PipelineStr[FlagBeginIdx], Sz);
-  FlagBeginIdx = Idx + 1;
-
-  // Get the pass that corresponds to PassName and add it to the pass
-  // manager.
-  auto Pass = CreatePass(PassName);
-  if (Pass == nullptr) {
-errs() << "Pass '" << PassName << "' not registered!\n";
-exit(1);
-  }
-  addPass(std::move(Pass));
-}
+Passes.push_back(Pass);
   }
-
 #ifndef NDEBUG
   void print(raw_ostream &OS) const override {
 OS << this->getName();
 OS << "(";
 // TODO: This should call Pass->print(OS) because Pass may be a PM.
-interleave(Passes, OS, [&OS](auto &Pass) { OS << Pass->getName(); }, ",");
+interleave(Passes, OS, [&OS](auto *Pass) { OS << Pass->getName(); }, ",");
 OS << ")";
   }
   LLVM_DUMP_METHOD void dump() const override {
@@ -121,6 +79,38 @@ class RegionPassManager final : public 
PassManager {
   bool runOnRegion(Region &R) final;
 };
 
+/// Owns the passes and provides an API to get a pass by its name.
+class PassRegistry {
+  SmallVector, 8> Passes;
+  DenseMap NameToPassMap;
+
+public:
+  static constexpr const char PassDelimToken = ',';
+  PassRegistry() = default;
+  /// Registers \p PassPtr and takes ownership.
+  Pass ®isterPass(std::unique_ptr &&PassPtr) {
+auto &PassRef = *PassPtr.get();
+NameToPassMap[PassRef.getName()] = &PassRef;
+Passes.push_back(std::move(PassPtr));
+return PassRef;
+  }
+  /// \Returns the pass with name \p Name, or null if not regi

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Handle atomic sextload and zextload (PR #111721)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> Missing test for buffer loads?

Those are the gfx7 global cases. There aren't any atomic buffer load intrinsics 

https://github.com/llvm/llvm-project/pull/111721
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] CWG2398: improve overload resolution backwards compat (PR #107350)

2024-10-09 Thread via llvm-branch-commits

https://github.com/cor3ntin approved this pull request.


https://github.com/llvm/llvm-project/pull/107350
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [Clang] Fix 'counted_by' for nested struct pointers (#110497) (PR #111445)

2024-10-09 Thread Eli Friedman via llvm-branch-commits

https://github.com/efriedma-quic approved this pull request.

LGTM

This should be safe to merge: it only affects usage of the new counted_by 
attribute, and this fixes a significant bug blocking usage of that feature.

https://github.com/llvm/llvm-project/pull/111445
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AtomicExpand: Copy metadata from atomicrmw to cmpxchg (PR #109409)

2024-10-09 Thread Eli Friedman via llvm-branch-commits

efriedma-quic wrote:

Did you address 
https://github.com/llvm/llvm-project/pull/109409#pullrequestreview-2332197449 ?

https://github.com/llvm/llvm-project/pull/109409
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] GlobalISel: Fix combine duplicating atomic loads (PR #111730)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> The original test files look unaffected? https://reviews.llvm.org/D85966

I don't think AArch64 will pass the legality check for the atomic sextload

https://github.com/llvm/llvm-project/pull/111730
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Handle atomic sextload and zextload (PR #111721)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> But patch adds several MUBUF_Pseudo_Load_Pats which are not covered by tests?

The only cases that might have missing coverage is extend to 16-bit register 
cases. In the DAG we didn't have legal 16-bit types on gfx6/7, but we could 
handle the loads here 



https://github.com/llvm/llvm-project/pull/111721
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [Flang] Move runtime library files to FortranRuntime. NFC (PR #110298)

2024-10-09 Thread Peter Klausler via llvm-branch-commits

klausler wrote:

> While it would be possible to only have a `FortranRuntime/CMakeLists.txt` and 
> leave all the other files in `flang/`, I strongly believe this is a bad idea. 
> It is the norm for LLVM runtimes to have separate sources.

A Fortran compiler and its runtime are tightly coupled; the compiler can only 
work with this runtime, this runtime can only be used by this compiler, and 
they share common API definitions and data structures.

I don't mind if you move the sources and headers that are used *only* by the 
runtime into a new top-level directory, if you must.  But moving common headers 
and sources used by *both* the compiler and the runtime builds out of the 
compiler tree makes no sense to me.


https://github.com/llvm/llvm-project/pull/110298
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] CWG2398: improve overload resolution backwards compat (PR #107350)

2024-10-09 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov edited 
https://github.com/llvm/llvm-project/pull/107350
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] GlobalISel: Fix combine duplicating atomic loads (PR #111730)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> Standard question: Could you add/extend a mir file for showing the different 
> cases and should the erase be conditional on the type?

The type doesn't matter. The original load always has to be removed. This is 
only done for hasOneUse anyway 

https://github.com/llvm/llvm-project/pull/111730
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] GlobalISel: Fix combine duplicating atomic loads (PR #111730)

2024-10-09 Thread Thorsten Schütt via llvm-branch-commits

tschuett wrote:

I meant: if atomic ...

https://github.com/llvm/llvm-project/pull/111730
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] GlobalISel: Fix combine duplicating atomic loads (PR #111730)

2024-10-09 Thread Thorsten Schütt via llvm-branch-commits

tschuett wrote:

The duplicating is only due to atomicity of the load?
https://github.com/llvm/llvm-project/blob/4605ba0437728ecf8233ba6dbb52ffba30a22743/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir#L17


Then I vote for:

```
if (atomic)
  EraseFromParent();
```
for documentation.

https://github.com/llvm/llvm-project/pull/111730
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] GlobalISel: Fix combine duplicating atomic loads (PR #111730)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> The duplicating is only due to atomicity of the load?

The duplicating is the apparent effect because the non-atomic load can be 
deleted. There's no plus to keeping it around 

> if (atomic)
>   EraseFromParent();
> ```

This just adds extra work for later code to delete. The zextload equivalent 
combine already directly deletes the load 



https://github.com/llvm/llvm-project/pull/111730
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)

2024-10-09 Thread Craig Topper via llvm-branch-commits

topperc wrote:

> Ping.
> 
> And some updates on vector support: currently, `ExpandMemCmp` will only 
> generate integer types (`i128`, `i256` and so on). So, if we simply add 
> `128`, `256`, `vlen` to `LoadSizes`, the LLVM IR will use `i128`/`i256`/... 
> and then they are expanded to legal scalar types as we don't mark 
> `i128`/`i256`/... as legal when RVV exists.
> 
> There are two ways to fix this:
> 
> 1. Make `ExpandMemCmp` generate vector types/operations.
> 2. Make `i128`/`i256`/... legal on RISC-V.
> 
> I think the first one is the right approach but I need some agreements on 
> this.

I think X86 only supports `IsZeroCmp` and has a pre-type legalization DAG 
combine to recognize the wide integer type. See X86ISelLowering 
`combineVectorSizedSetCCEquality`.

General memcmp is complicated with vector. You need to find the first element 
where the mismatch occurred and then compare only that element to find whether 
it is larger or smaller. I don't know if you can write that in target 
independent IR without it being really nasty and it wouldn't generate efficient 
code.

https://github.com/llvm/llvm-project/pull/107548
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Handle atomic sextload and zextload (PR #111721)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits


@@ -0,0 +1,331 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | 
FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck 
-check-prefixes=GCN,GFX8 %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | 
FileCheck -check-prefixes=GCN,GFX9 %s
+
+define i8 @atomic_load_flat_monotonic_i8(ptr %ptr) {
+; GCN-LABEL: atomic_load_flat_monotonic_i8:
+; GCN:   ; %bb.0:
+; GCN-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:flat_load_ubyte v0, v[0:1] glc
+; GCN-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:s_setpc_b64 s[30:31]
+  %load = load atomic i8, ptr %ptr monotonic, align 1
+  ret i8 %load
+}
+
+define i32 @atomic_load_flat_monotonic_i8_zext_to_i32(ptr %ptr) {
+; GCN-LABEL: atomic_load_flat_monotonic_i8_zext_to_i32:
+; GCN:   ; %bb.0:
+; GCN-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:flat_load_ubyte v0, v[0:1] glc
+; GCN-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:s_setpc_b64 s[30:31]
+  %load = load atomic i8, ptr %ptr monotonic, align 1
+  %ext = zext i8 %load to i32
+  ret i32 %ext
+}
+
+define i32 @atomic_load_flat_monotonic_i8_sext_to_i32(ptr %ptr) {
+; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
+; GFX7:   ; %bb.0:
+; GFX7-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:flat_load_sbyte v2, v[0:1] glc
+; GFX7-NEXT:flat_load_ubyte v0, v[0:1] glc
+; GFX7-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX7-NEXT:v_mov_b32_e32 v0, v2
+; GFX7-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
+; GFX8:   ; %bb.0:
+; GFX8-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:flat_load_sbyte v2, v[0:1] glc
+; GFX8-NEXT:flat_load_ubyte v0, v[0:1] glc
+; GFX8-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX8-NEXT:v_mov_b32_e32 v0, v2
+; GFX8-NEXT:s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
+; GFX9:   ; %bb.0:
+; GFX9-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:flat_load_sbyte v2, v[0:1] glc

arsenm wrote:

There's a bug in amdgpu-postlegalizer-combiner, so it's not from the selection 

https://github.com/llvm/llvm-project/pull/111721
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)

2024-10-09 Thread Craig Topper via llvm-branch-commits


@@ -1144,42 +2872,116 @@ entry:
 define i32 @memcmp_size_4(ptr %s1, ptr %s2) nounwind {
 ; CHECK-ALIGNED-RV32-LABEL: memcmp_size_4:
 ; CHECK-ALIGNED-RV32:   # %bb.0: # %entry
-; CHECK-ALIGNED-RV32-NEXT:addi sp, sp, -16
-; CHECK-ALIGNED-RV32-NEXT:sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-ALIGNED-RV32-NEXT:li a2, 4
-; CHECK-ALIGNED-RV32-NEXT:call memcmp
-; CHECK-ALIGNED-RV32-NEXT:lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-ALIGNED-RV32-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV32-NEXT:lbu a2, 0(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a3, 1(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a4, 3(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a0, 2(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a5, 0(a1)
+; CHECK-ALIGNED-RV32-NEXT:lbu a6, 1(a1)
+; CHECK-ALIGNED-RV32-NEXT:lbu a7, 3(a1)
+; CHECK-ALIGNED-RV32-NEXT:lbu a1, 2(a1)
+; CHECK-ALIGNED-RV32-NEXT:slli a0, a0, 8
+; CHECK-ALIGNED-RV32-NEXT:or a0, a0, a4
+; CHECK-ALIGNED-RV32-NEXT:slli a3, a3, 16
+; CHECK-ALIGNED-RV32-NEXT:slli a2, a2, 24
+; CHECK-ALIGNED-RV32-NEXT:or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:or a0, a2, a0
+; CHECK-ALIGNED-RV32-NEXT:slli a1, a1, 8
+; CHECK-ALIGNED-RV32-NEXT:or a1, a1, a7
+; CHECK-ALIGNED-RV32-NEXT:slli a6, a6, 16
+; CHECK-ALIGNED-RV32-NEXT:slli a5, a5, 24
+; CHECK-ALIGNED-RV32-NEXT:or a2, a5, a6
+; CHECK-ALIGNED-RV32-NEXT:or a1, a2, a1
+; CHECK-ALIGNED-RV32-NEXT:sltu a2, a1, a0
+; CHECK-ALIGNED-RV32-NEXT:sltu a0, a0, a1
+; CHECK-ALIGNED-RV32-NEXT:sub a0, a2, a0
 ; CHECK-ALIGNED-RV32-NEXT:ret
 ;
 ; CHECK-ALIGNED-RV64-LABEL: memcmp_size_4:
 ; CHECK-ALIGNED-RV64:   # %bb.0: # %entry
-; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, -16
-; CHECK-ALIGNED-RV64-NEXT:sd ra, 8(sp) # 8-byte Folded Spill
-; CHECK-ALIGNED-RV64-NEXT:li a2, 4
-; CHECK-ALIGNED-RV64-NEXT:call memcmp
-; CHECK-ALIGNED-RV64-NEXT:ld ra, 8(sp) # 8-byte Folded Reload
-; CHECK-ALIGNED-RV64-NEXT:addi sp, sp, 16
+; CHECK-ALIGNED-RV64-NEXT:lbu a2, 0(a0)
+; CHECK-ALIGNED-RV64-NEXT:lbu a3, 1(a0)
+; CHECK-ALIGNED-RV64-NEXT:lbu a4, 2(a0)
+; CHECK-ALIGNED-RV64-NEXT:lb a0, 3(a0)
+; CHECK-ALIGNED-RV64-NEXT:lbu a5, 0(a1)
+; CHECK-ALIGNED-RV64-NEXT:lbu a6, 1(a1)
+; CHECK-ALIGNED-RV64-NEXT:lbu a7, 2(a1)
+; CHECK-ALIGNED-RV64-NEXT:lb a1, 3(a1)
+; CHECK-ALIGNED-RV64-NEXT:andi a0, a0, 255
+; CHECK-ALIGNED-RV64-NEXT:slli a4, a4, 8
+; CHECK-ALIGNED-RV64-NEXT:or a0, a4, a0
+; CHECK-ALIGNED-RV64-NEXT:slli a3, a3, 16
+; CHECK-ALIGNED-RV64-NEXT:slliw a2, a2, 24
+; CHECK-ALIGNED-RV64-NEXT:or a2, a2, a3
+; CHECK-ALIGNED-RV64-NEXT:or a0, a2, a0
+; CHECK-ALIGNED-RV64-NEXT:andi a1, a1, 255
+; CHECK-ALIGNED-RV64-NEXT:slli a7, a7, 8
+; CHECK-ALIGNED-RV64-NEXT:or a1, a7, a1
+; CHECK-ALIGNED-RV64-NEXT:slli a6, a6, 16
+; CHECK-ALIGNED-RV64-NEXT:slliw a2, a5, 24
+; CHECK-ALIGNED-RV64-NEXT:or a2, a2, a6
+; CHECK-ALIGNED-RV64-NEXT:or a1, a2, a1
+; CHECK-ALIGNED-RV64-NEXT:sltu a2, a1, a0
+; CHECK-ALIGNED-RV64-NEXT:sltu a0, a0, a1
+; CHECK-ALIGNED-RV64-NEXT:sub a0, a2, a0
 ; CHECK-ALIGNED-RV64-NEXT:ret
 ;
 ; CHECK-UNALIGNED-RV32-LABEL: memcmp_size_4:
 ; CHECK-UNALIGNED-RV32:   # %bb.0: # %entry
-; CHECK-UNALIGNED-RV32-NEXT:addi sp, sp, -16
-; CHECK-UNALIGNED-RV32-NEXT:sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-UNALIGNED-RV32-NEXT:li a2, 4
-; CHECK-UNALIGNED-RV32-NEXT:call memcmp
-; CHECK-UNALIGNED-RV32-NEXT:lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-UNALIGNED-RV32-NEXT:addi sp, sp, 16
+; CHECK-UNALIGNED-RV32-NEXT:lw a0, 0(a0)

topperc wrote:

Add a RUN line with Zbb/Zbkb?

Maybe we should restrict MemCmp expansion to only when Zbb/Zbkb are are enabled?

https://github.com/llvm/llvm-project/pull/107548
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Handle atomic sextload and zextload (PR #111721)

2024-10-09 Thread Stanislav Mekhanoshin via llvm-branch-commits

rampitec wrote:

> > Missing test for buffer loads?
> 
> Those are the gfx7 global cases. There aren't any atomic buffer load 
> intrinsics

But patch adds several MUBUF_Pseudo_Load_Pats which are not covered by tests?

https://github.com/llvm/llvm-project/pull/111721
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Handle atomic sextload and zextload (PR #111721)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/111721

>From 81dad077b21128cfc827ffb18b12631407a2bde4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 9 Oct 2024 19:35:29 +0400
Subject: [PATCH] AMDGPU/GlobalISel: Handle atomic sextload and zextload

Atomic loads are handled differently from the DAG, and have separate opcodes
and explicit control over the extensions, like ordinary loads. Add
new patterns for these.

There's room for cleanup and improvement. d16 cases aren't handled.

Fixes #111645
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   2 +
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td  |  21 +
 llvm/lib/Target/AMDGPU/BUFInstructions.td |   7 +
 llvm/lib/Target/AMDGPU/DSInstructions.td  |   7 +
 llvm/lib/Target/AMDGPU/FLATInstructions.td|  16 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |  45 ++
 .../AMDGPU/GlobalISel/atomic_load_flat.ll | 331 
 .../AMDGPU/GlobalISel/atomic_load_global.ll   | 478 
 .../AMDGPU/GlobalISel/atomic_load_local_2.ll  | 509 ++
 9 files changed, 1416 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 278d3536add916..d348f489d95dd3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -207,6 +207,8 @@ def : GINodeEquiv {
 
 def : GINodeEquiv {
   bit CheckMMOIsAtomic = 1;
+  let IfSignExtend = G_SEXTLOAD;
+  let IfZeroExtend = G_ZEXTLOAD;
 }
 
 def : GINodeEquiv {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 09987a6504b9d0..7816ae911312c2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -521,6 +521,27 @@ def atomic_load_64_#as : PatFrag<(ops node:$ptr), 
(atomic_load_64 node:$ptr)> {
   let IsAtomic = 1;
   let MemoryVT = i64;
 }
+
+def atomic_load_zext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_zext 
node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i8;
+}
+
+def atomic_load_sext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_sext 
node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i8;
+}
+
+def atomic_load_zext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_zext 
node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i16;
+}
+
+def atomic_load_sext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_sext 
node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i16;
+}
+
 } // End let AddressSpaces
 } // End foreach as
 
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td 
b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 6bdff9862e55ac..4ed0f9ade871f1 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -983,15 +983,22 @@ defm BUFFER_LOAD_LDS_U16 : MUBUF_Pseudo_Loads_LDSOpc <
 >;
 
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, atomic_load_8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, 
atomic_load_zext_8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, 
atomic_load_16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, 
atomic_load_zext_16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, atomic_load_8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, 
atomic_load_16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, 
atomic_load_zext_8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, 
atomic_load_zext_16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, 
atomic_load_sext_8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, 
atomic_load_sext_16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, extloadi16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, zextloadi16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, 
atomic_load_sext_16_global>;
 
 foreach vt = Reg32Types.types in {
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", vt, load_global>;
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td 
b/llvm/lib/Target/AMDGPU/DSInstructions.td
index e9283fde85a48d..7724821bbd7c36 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -795,12 +795,19 @@ defm : DSReadPat_mc ;
 
 defm : DSReadPat_mc ;
 defm : DSReadPat_mc ;
+defm : DSReadPat_mc ;
+defm : DSReadPat_mc ;
+defm : DSReadPat_mc ;
+d

[llvm-branch-commits] [llvm] GlobalISel: Fix combine duplicating atomic loads (PR #111730)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/111730

The sext_inreg (load) combine was not deleting the old load instruction,
and it would never be deleted if volatile or atomic.

>From 5a23c2797ae59eb493fb9804ec32a1b8dc7755b2 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 9 Oct 2024 22:05:48 +0400
Subject: [PATCH] GlobalISel: Fix combine duplicating atomic loads

The sext_inreg (load) combine was not deleting the old load instruction,
and it would never be deleted if volatile or atomic.
---
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  1 +
 .../AMDGPU/GlobalISel/atomic_load_flat.ll | 96 ---
 .../AMDGPU/GlobalISel/atomic_load_global.ll   | 39 ++--
 .../AMDGPU/GlobalISel/atomic_load_local_2.ll  | 36 ++-
 ...lizer-combiner-sextload-from-sextinreg.mir |  2 -
 5 files changed, 37 insertions(+), 137 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp 
b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 14e94d48bf8362..535c827f6a8223 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1110,6 +1110,7 @@ void CombinerHelper::applySextInRegOfLoad(
   Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
  LoadDef->getPointerReg(), *NewMMO);
   MI.eraseFromParent();
+  LoadDef->eraseFromParent();
 }
 
 /// Return true if 'MI' is a load or a store that may be fold it's address
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
index 788fb04e842b4e..fc3bc09cf8e3e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
@@ -27,32 +27,12 @@ define i32 @atomic_load_flat_monotonic_i8_zext_to_i32(ptr 
%ptr) {
 }
 
 define i32 @atomic_load_flat_monotonic_i8_sext_to_i32(ptr %ptr) {
-; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
-; GFX7:   ; %bb.0:
-; GFX7-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT:flat_load_ubyte v0, v[0:1] glc
-; GFX7-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:v_mov_b32_e32 v0, v2
-; GFX7-NEXT:s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
-; GFX8:   ; %bb.0:
-; GFX8-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT:flat_load_ubyte v0, v[0:1] glc
-; GFX8-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:v_mov_b32_e32 v0, v2
-; GFX8-NEXT:s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
-; GFX9:   ; %bb.0:
-; GFX9-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:flat_load_sbyte v2, v[0:1] glc
-; GFX9-NEXT:flat_load_ubyte v3, v[0:1] glc
-; GFX9-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:v_mov_b32_e32 v0, v2
-; GFX9-NEXT:s_setpc_b64 s[30:31]
+; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
+; GCN:   ; %bb.0:
+; GCN-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:flat_load_sbyte v0, v[0:1] glc
+; GCN-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:s_setpc_b64 s[30:31]
   %load = load atomic i8, ptr %ptr monotonic, align 1
   %ext = sext i8 %load to i32
   ret i32 %ext
@@ -71,32 +51,12 @@ define i16 @atomic_load_flat_monotonic_i8_zext_to_i16(ptr 
%ptr) {
 }
 
 define i16 @atomic_load_flat_monotonic_i8_sext_to_i16(ptr %ptr) {
-; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
-; GFX7:   ; %bb.0:
-; GFX7-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT:flat_load_ubyte v0, v[0:1] glc
-; GFX7-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:v_mov_b32_e32 v0, v2
-; GFX7-NEXT:s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
-; GFX8:   ; %bb.0:
-; GFX8-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT:flat_load_ubyte v0, v[0:1] glc
-; GFX8-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:v_mov_b32_e32 v0, v2
-; GFX8-NEXT:s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
-; GFX9:   ; %bb.0:
-; GFX9-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:flat_load_sbyte v2, v[0:1] glc
-; GFX9-NEXT:flat_load_ubyte v3, v[0:1] glc
-; GFX9-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:v_mov_b32_e32 v0, v2
-; GFX9-NEXT:s_setpc_b64 s[30:31]
+; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
+; GCN:   ; %bb.0:
+; GCN-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:flat_load_sbyte v0, v[0:1] glc
+; GCN-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:s_setpc_b64 s[30:31]
   %load = load atomic i8, ptr %ptr monotonic, align 1
   %ext = sext i8 %load to i16
   ret

[llvm-branch-commits] [llvm] GlobalISel: Fix combine duplicating atomic loads (PR #111730)

2024-10-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-globalisel

Author: Matt Arsenault (arsenm)


Changes

The sext_inreg (load) combine was not deleting the old load instruction,
and it would never be deleted if volatile or atomic.

---
Full diff: https://github.com/llvm/llvm-project/pull/111730.diff


5 Files Affected:

- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll (+18-78) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll (+9-30) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll (+9-27) 
- (modified) 
llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir
 (-2) 


``diff
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp 
b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 14e94d48bf8362..535c827f6a8223 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1110,6 +1110,7 @@ void CombinerHelper::applySextInRegOfLoad(
   Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
  LoadDef->getPointerReg(), *NewMMO);
   MI.eraseFromParent();
+  LoadDef->eraseFromParent();
 }
 
 /// Return true if 'MI' is a load or a store that may be fold it's address
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll 
b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
index 788fb04e842b4e..fc3bc09cf8e3e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
@@ -27,32 +27,12 @@ define i32 @atomic_load_flat_monotonic_i8_zext_to_i32(ptr 
%ptr) {
 }
 
 define i32 @atomic_load_flat_monotonic_i8_sext_to_i32(ptr %ptr) {
-; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
-; GFX7:   ; %bb.0:
-; GFX7-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT:flat_load_ubyte v0, v[0:1] glc
-; GFX7-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:v_mov_b32_e32 v0, v2
-; GFX7-NEXT:s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
-; GFX8:   ; %bb.0:
-; GFX8-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT:flat_load_ubyte v0, v[0:1] glc
-; GFX8-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:v_mov_b32_e32 v0, v2
-; GFX8-NEXT:s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
-; GFX9:   ; %bb.0:
-; GFX9-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:flat_load_sbyte v2, v[0:1] glc
-; GFX9-NEXT:flat_load_ubyte v3, v[0:1] glc
-; GFX9-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:v_mov_b32_e32 v0, v2
-; GFX9-NEXT:s_setpc_b64 s[30:31]
+; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
+; GCN:   ; %bb.0:
+; GCN-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:flat_load_sbyte v0, v[0:1] glc
+; GCN-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:s_setpc_b64 s[30:31]
   %load = load atomic i8, ptr %ptr monotonic, align 1
   %ext = sext i8 %load to i32
   ret i32 %ext
@@ -71,32 +51,12 @@ define i16 @atomic_load_flat_monotonic_i8_zext_to_i16(ptr 
%ptr) {
 }
 
 define i16 @atomic_load_flat_monotonic_i8_sext_to_i16(ptr %ptr) {
-; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
-; GFX7:   ; %bb.0:
-; GFX7-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT:flat_load_ubyte v0, v[0:1] glc
-; GFX7-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:v_mov_b32_e32 v0, v2
-; GFX7-NEXT:s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
-; GFX8:   ; %bb.0:
-; GFX8-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT:flat_load_ubyte v0, v[0:1] glc
-; GFX8-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:v_mov_b32_e32 v0, v2
-; GFX8-NEXT:s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
-; GFX9:   ; %bb.0:
-; GFX9-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:flat_load_sbyte v2, v[0:1] glc
-; GFX9-NEXT:flat_load_ubyte v3, v[0:1] glc
-; GFX9-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:v_mov_b32_e32 v0, v2
-; GFX9-NEXT:s_setpc_b64 s[30:31]
+; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
+; GCN:   ; %bb.0:
+; GCN-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:flat_load_sbyte v0, v[0:1] glc
+; GCN-NEXT:s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:s_setpc_b64 s[30:31]
   %load = load atomic i8, ptr %ptr monotonic, align 1
   %ext = sext i8 %load to i16
   ret i16 %ext
@@ -126,32 +86,12 @@ define i32 @atomic_load_flat_monotonic_i16_zext_to_i32(ptr 
%ptr) {
 }
 
 define i32 @atomic_load_flat_monotonic_i16

[llvm-branch-commits] [llvm] GlobalISel: Fix combine duplicating atomic loads (PR #111730)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/111730
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] GlobalISel: Fix combine duplicating atomic loads (PR #111730)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/111730?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#111730** https://app.graphite.dev/github/pr/llvm/llvm-project/111730?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#111721** https://app.graphite.dev/github/pr/llvm/llvm-project/111721?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#111720** https://app.graphite.dev/github/pr/llvm/llvm-project/111720?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/111730
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Handle atomic sextload and zextload (PR #111721)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/111721

Atomic loads are handled differently from the DAG, and have separate opcodes
and explicit control over the extensions, like ordinary loads. Add
new patterns for these.

There's room for cleanup and improvement. d16 cases aren't handled.

Fixes #111645

>From dd5d25a025df9b222f28026154f40aefe601c710 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 9 Oct 2024 19:35:29 +0400
Subject: [PATCH] AMDGPU: Handle atomic sextload and zextload

Atomic loads are handled differently from the DAG, and have separate opcodes
and explicit control over the extensions, like ordinary loads. Add
new patterns for these.

There's room for cleanup and improvement. d16 cases aren't handled.

Fixes #111645
---
 llvm/lib/Target/AMDGPU/AMDGPUGISel.td |   2 +
 llvm/lib/Target/AMDGPU/AMDGPUInstructions.td  |  21 +
 llvm/lib/Target/AMDGPU/BUFInstructions.td |   7 +
 llvm/lib/Target/AMDGPU/DSInstructions.td  |   7 +
 llvm/lib/Target/AMDGPU/FLATInstructions.td|  16 +
 llvm/lib/Target/AMDGPU/SIInstrInfo.td |  45 ++
 .../AMDGPU/GlobalISel/atomic_load_flat.ll | 331 
 .../AMDGPU/GlobalISel/atomic_load_global.ll   | 478 
 .../AMDGPU/GlobalISel/atomic_load_local_2.ll  | 509 ++
 9 files changed, 1416 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 278d3536add916..d348f489d95dd3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -207,6 +207,8 @@ def : GINodeEquiv {
 
 def : GINodeEquiv {
   bit CheckMMOIsAtomic = 1;
+  let IfSignExtend = G_SEXTLOAD;
+  let IfZeroExtend = G_ZEXTLOAD;
 }
 
 def : GINodeEquiv {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 09987a6504b9d0..7816ae911312c2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -521,6 +521,27 @@ def atomic_load_64_#as : PatFrag<(ops node:$ptr), 
(atomic_load_64 node:$ptr)> {
   let IsAtomic = 1;
   let MemoryVT = i64;
 }
+
+def atomic_load_zext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_zext 
node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i8;
+}
+
+def atomic_load_sext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_sext 
node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i8;
+}
+
+def atomic_load_zext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_zext 
node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i16;
+}
+
+def atomic_load_sext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_sext 
node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i16;
+}
+
 } // End let AddressSpaces
 } // End foreach as
 
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td 
b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 6bdff9862e55ac..4ed0f9ade871f1 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -983,15 +983,22 @@ defm BUFFER_LOAD_LDS_U16 : MUBUF_Pseudo_Loads_LDSOpc <
 >;
 
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, atomic_load_8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, 
atomic_load_zext_8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, 
atomic_load_16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, 
atomic_load_zext_16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, atomic_load_8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, 
atomic_load_16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, 
atomic_load_zext_8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, 
atomic_load_zext_16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, 
atomic_load_sext_8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, 
atomic_load_sext_16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, extloadi16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, zextloadi16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, 
atomic_load_sext_16_global>;
 
 foreach vt = Reg32Types.types in {
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", vt, load_global>;
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td 
b/llvm/lib/Target/AMDGPU/DSInstructions.td
index e9283fde85a48d..7724821bbd7c36 100644
--- a/llvm/lib/

[llvm-branch-commits] [llvm] AMDGPU: Handle atomic sextload and zextload (PR #111721)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/111721?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#111721** https://app.graphite.dev/github/pr/llvm/llvm-project/111721?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#111720** https://app.graphite.dev/github/pr/llvm/llvm-project/111720?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/111721
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Handle atomic sextload and zextload (PR #111721)

2024-10-09 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-globalisel

Author: Matt Arsenault (arsenm)


Changes

Atomic loads are handled differently from the DAG, and have separate opcodes
and explicit control over the extensions, like ordinary loads. Add
new patterns for these.

There's room for cleanup and improvement. d16 cases aren't handled.

Fixes #111645

---

Patch is 62.48 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/111721.diff


9 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUGISel.td (+2) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (+21) 
- (modified) llvm/lib/Target/AMDGPU/BUFInstructions.td (+7) 
- (modified) llvm/lib/Target/AMDGPU/DSInstructions.td (+7) 
- (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+16) 
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+45) 
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll (+331) 
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll (+478) 
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll (+509) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td 
b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 278d3536add916..d348f489d95dd3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -207,6 +207,8 @@ def : GINodeEquiv {
 
 def : GINodeEquiv {
   bit CheckMMOIsAtomic = 1;
+  let IfSignExtend = G_SEXTLOAD;
+  let IfZeroExtend = G_ZEXTLOAD;
 }
 
 def : GINodeEquiv {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 09987a6504b9d0..7816ae911312c2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -521,6 +521,27 @@ def atomic_load_64_#as : PatFrag<(ops node:$ptr), 
(atomic_load_64 node:$ptr)> {
   let IsAtomic = 1;
   let MemoryVT = i64;
 }
+
+def atomic_load_zext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_zext 
node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i8;
+}
+
+def atomic_load_sext_8_#as : PatFrag<(ops node:$ptr), (atomic_load_sext 
node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i8;
+}
+
+def atomic_load_zext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_zext 
node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i16;
+}
+
+def atomic_load_sext_16_#as : PatFrag<(ops node:$ptr), (atomic_load_sext 
node:$ptr)> {
+  let IsAtomic = 1;
+  let MemoryVT = i16;
+}
+
 } // End let AddressSpaces
 } // End foreach as
 
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td 
b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 6bdff9862e55ac..4ed0f9ade871f1 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -983,15 +983,22 @@ defm BUFFER_LOAD_LDS_U16 : MUBUF_Pseudo_Loads_LDSOpc <
 >;
 
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, atomic_load_8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, 
atomic_load_zext_8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, 
atomic_load_16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, 
atomic_load_zext_16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, atomic_load_8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, 
atomic_load_16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i16, 
atomic_load_zext_8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i16, 
atomic_load_zext_16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, extloadi8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_UBYTE", i32, zextloadi8_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, sextloadi8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, 
atomic_load_sext_8_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SBYTE", i32, 
atomic_load_sext_16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, extloadi16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_USHORT", i32, zextloadi16_global>;
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, sextloadi16_global>;
+defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_SSHORT", i32, 
atomic_load_sext_16_global>;
 
 foreach vt = Reg32Types.types in {
 defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORD", vt, load_global>;
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td 
b/llvm/lib/Target/AMDGPU/DSInstructions.td
index e9283fde85a48d..7724821bbd7c36 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -795,12 +795,19 @@ defm : DSReadPat_mc ;
 
 defm : DSReadPat_mc ;
 defm : DSReadPat_mc ;
+defm : DSReadPat_mc ;
+defm : DSReadPat_mc ;
+defm : DSReadPat_mc ;
+defm : DSReadPat_mc ;
 defm : DSReadPat_mc ;
 defm : DSReadPat_mc ;
+defm : DSReadPat_mc ;
+defm : DSReadPat_mc ;
 defm : DSReadPat_mc ;
 defm : DSReadPat_mc ;
 
 let OtherPredicates = [D16PreservesUnusedBits] in {

[llvm-branch-commits] [llvm] AMDGPU: Handle atomic sextload and zextload (PR #111721)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/111721
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Handle atomic sextload and zextload (PR #111721)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/111721
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] GlobalISel: Fix combine duplicating atomic loads (PR #111730)

2024-10-09 Thread Thorsten Schütt via llvm-branch-commits

tschuett wrote:

The original test files look unaffected? https://reviews.llvm.org/D85966

https://github.com/llvm/llvm-project/pull/111730
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [Flang] Move runtime library files to FortranRuntime. NFC (PR #110298)

2024-10-09 Thread Valentin Clement バレンタイン クレメン via llvm-branch-commits

clementval wrote:

> > How recent is your latest rebase? They are missing files in this PR.
> 
> I did not do a rebase since I started working on this PR, maybe 3 weeks ago. 
> With new files added/removed upstream and at the same time reviewers probably 
> asking to change the move location, keeping the PR stack current all the 
> time, and also ensure that every change is part of the correct PR, is a bit 
> of a hassle. I intend to start updating to `main` only after the directional 
> discussion of #110217 concluded (which may for instance decide to rename 
> `FortranRuntime` to `flang-rt`). I hope this is understandable.
> 
> In the past I also often forgot that I should only merge `main` into the 
> first patch of the sequence, then iteratively update all other in the 
> sequence. If you don't do that GitHub considers the upsteam commits as part 
> of the PR, and runs some bots which automatically add a lot of reviewers and 
> subscribers for each touched file in the upstream commits. Since then I am 
> careful to rebase patch sequences.

Ok, just wanted to make sure you are aware of new files. 

https://github.com/llvm/llvm-project/pull/110298
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Local: Handle noalias.addrspace in copyMetadataForLoad (PR #103939)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits


@@ -177,7 +177,7 @@ define i32 @test_load_cast_combine_noalias_addrspace(ptr 
%ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.

arsenm wrote:

```suggestion
; Ensure (cast (load (...))) -> (load (cast (...))) preserves no 
alias.addrspace.
```

https://github.com/llvm/llvm-project/pull/103939
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [MIR] Add missing noteNewVirtualRegister callbacks (PR #111634)

2024-10-09 Thread via llvm-branch-commits

Ayush9054 wrote:

Observation:

It has come to our attention that the changes made in 
llvm/lib/CodeGen/MIRParser/MIRParser.cpp are not related to the current pull 
request (PR). As these modifications do not pertain directly to the 
functionality addressed in this PR, they should be extracted and submitted as a 
separate non-functional change (NFC) patch.

Action Plan:

Create a Separate NFC Patch:

Isolate the changes made in MIRParser.cpp.
Ensure that the patch focuses solely on the improvements or refactoring present 
in that file without impacting the main objectives of the current PR.
Submit the Patch:

Once isolated, submit the NFC patch for review.
Ensure that the description clearly outlines the purpose of the changes made in 
MIRParser.cpp.
Next Steps:

Please prioritize the creation and submission of the NFC patch to maintain 
clarity and organization within the codebase.
Thank you for your attention to this mat

https://github.com/llvm/llvm-project/pull/111634
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add instruction flags when lowering ctor/dtor (PR #111652)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/111652
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DAG: Preserve more flags when expanding gep (PR #110815)

2024-10-09 Thread Nikita Popov via llvm-branch-commits

https://github.com/nikic approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/110815
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [flang] [lld] [llvm] [Flang] LLVM_ENABLE_RUNTIMES=FortranRuntime (PR #110217)

2024-10-09 Thread Michael Kruse via llvm-branch-commits


@@ -171,145 +76,88 @@ set(sources
   unit-map.cpp
   unit.cpp
   utf.cpp
-  ${FORTRAN_MODULE_OBJECTS}
 )
 
-include(AddFlangOffloadRuntime)
-
-# List of files that are buildable for all devices.
-set(supported_files
-  ISO_Fortran_binding.cpp
-  allocatable.cpp
-  allocator-registry.cpp
-  array-constructor.cpp
-  assign.cpp
-  buffer.cpp
-  character.cpp
-  connection.cpp
-  copy.cpp
-  derived-api.cpp
-  derived.cpp
-  descriptor.cpp
-  descriptor-io.cpp
-  dot-product.cpp
-  edit-input.cpp
-  edit-output.cpp
-  environment.cpp
-  extrema.cpp
-  external-unit.cpp
-  file.cpp
-  findloc.cpp
-  format.cpp
-  inquiry.cpp
-  internal-unit.cpp
-  io-api.cpp
-  io-api-minimal.cpp
-  io-error.cpp
-  io-stmt.cpp
-  iostat.cpp
-  matmul-transpose.cpp
-  matmul.cpp
-  memory.cpp
-  misc-intrinsic.cpp
-  namelist.cpp
-  non-tbp-dio.cpp
-  numeric.cpp
-  pointer.cpp
-  product.cpp
-  pseudo-unit.cpp
-  ragged.cpp
-  stat.cpp
-  sum.cpp
-  support.cpp
-  terminator.cpp
-  tools.cpp
-  transformational.cpp
-  type-code.cpp
-  type-info.cpp
-  unit.cpp
-  utf.cpp
+set(public_headers "")
+file(GLOB_RECURSE public_headers
+  "${FLANGRUNTIME_SOURCE_DIR}/include/flang/Runtime/*.h"
+  "${FLANGRUNTIME_SOURCE_DIR}/include/flang/Common/*.h"
   )
 
-enable_cuda_compilation(FortranRuntime "${supported_files}")
-enable_omp_offload_compilation("${supported_files}")

Meinersbur wrote:

I had expected that I moved those to `add_fortranruntime_library` but 
apparently I didn't. I re-added it with the latest push, including some build 
fixes since it doesn't even work with the trunk version. Even `flang-new` 
doesn't compile with the error:
```
ld.lld: error: undefined symbol: 
__cudaRegisterLinkedBinary_9067bd07_21_binary_to_decimal_cpp_c17d0b68
>>> referenced by tmpxft_2a72_-6_binary-to-decimal.cudafe1.cpp
>>>   binary-to-decimal.cpp.o:(__sticudaRegisterAll()) in 
>>> archive lib/libFortranDecimal.a
```
This is because libFortranDecimal is [compiled as CUDA 
source](https://github.com/llvm/llvm-project/blob/cc99bddb71738761bfe21490f3b6853da036cf97/flang/lib/Decimal/CMakeLists.txt#L58)
 (but not libFortranCommon) and needed for `flang-new`. libFortranDecimal for 
FortranRuntime and Flang should really be compiled independently of each other, 
as this PR does.

https://github.com/llvm/llvm-project/pull/110217
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [Flang] Move runtime library files to FortranRuntime. NFC (PR #110298)

2024-10-09 Thread Michael Kruse via llvm-branch-commits

Meinersbur wrote:

> How recent is your latest rebase? They are missing files in this PR.

I did not do a rebase since I started working on this PR, maybe 3 weeks ago. 
With new files added/removed upstream and at the same time reviewers probably 
asking to change the move location, keeping the PR stack current all the time, 
and also ensure that every change is part of the correct PR, is a bit of a 
hassle. I intend to start updating to `main` only after the directional 
discussion of #110217 concluded (which may for instance decide to rename 
`FortranRuntime` to `flang-rt`). I hope this is understandable.

In the past I also often forgot that I should only merge `main` into the first 
patch of the sequence, then iteratively update all other in the sequence. If 
you don't do that GitHub considers the upsteam commits as part of the PR, and 
runs some bots which automatically add a lot of reviewers and subscribers for 
each touched file in the upstream commits. Since then I am careful to rebase 
patch sequences.

https://github.com/llvm/llvm-project/pull/110298
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AtomicExpand: Copy metadata from atomicrmw to cmpxchg (PR #109409)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

ping 

https://github.com/llvm/llvm-project/pull/109409
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lldb] 358ad57 - Revert "[lldb][test] Enable TestDAP_runInTerminal on non-x86 architectures (#…"

2024-10-09 Thread via llvm-branch-commits

Author: David Spickett
Date: 2024-10-09T09:54:53+01:00
New Revision: 358ad5760066ef415012732c03fb28ec823f3205

URL: 
https://github.com/llvm/llvm-project/commit/358ad5760066ef415012732c03fb28ec823f3205
DIFF: 
https://github.com/llvm/llvm-project/commit/358ad5760066ef415012732c03fb28ec823f3205.diff

LOG: Revert "[lldb][test] Enable TestDAP_runInTerminal on non-x86 architectures 
(#…"

This reverts commit de4f2c976f9fa11173c71b2b070225c9be89ceef.

Added: 


Modified: 
lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py

Removed: 




diff  --git 
a/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py 
b/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py
index 38a3c4d68eb280..ac96bcc1364a27 100644
--- a/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py
+++ b/lldb/test/API/tools/lldb-dap/runInTerminal/TestDAP_runInTerminal.py
@@ -44,6 +44,7 @@ def isTestSupported(self):
 return False
 
 @skipIfWindows
+@skipIf(archs=no_match(["x86_64"]))
 def test_runInTerminal(self):
 if not self.isTestSupported():
 return
@@ -89,6 +90,7 @@ def test_runInTerminal(self):
 env = self.dap_server.request_evaluate("foo")["body"]["result"]
 self.assertIn("bar", env)
 
+@skipIf(archs=no_match(["x86_64"]))
 def test_runInTerminalWithObjectEnv(self):
 if not self.isTestSupported():
 return
@@ -112,6 +114,7 @@ def test_runInTerminalWithObjectEnv(self):
 self.assertEqual("BAR", request_envs["FOO"])
 
 @skipIfWindows
+@skipIf(archs=no_match(["x86_64"]))
 def test_runInTerminalInvalidTarget(self):
 if not self.isTestSupported():
 return
@@ -130,6 +133,7 @@ def test_runInTerminalInvalidTarget(self):
 )
 
 @skipIfWindows
+@skipIf(archs=no_match(["x86_64"]))
 def test_missingArgInRunInTerminalLauncher(self):
 if not self.isTestSupported():
 return
@@ -144,6 +148,7 @@ def test_missingArgInRunInTerminalLauncher(self):
 )
 
 @skipIfWindows
+@skipIf(archs=no_match(["x86_64"]))
 def test_FakeAttachedRunInTerminalLauncherWithInvalidProgram(self):
 if not self.isTestSupported():
 return
@@ -170,6 +175,7 @@ def 
test_FakeAttachedRunInTerminalLauncherWithInvalidProgram(self):
 self.assertIn("No such file or directory", stderr)
 
 @skipIfWindows
+@skipIf(archs=no_match(["x86_64"]))
 def test_FakeAttachedRunInTerminalLauncherWithValidProgram(self):
 if not self.isTestSupported():
 return
@@ -196,6 +202,7 @@ def 
test_FakeAttachedRunInTerminalLauncherWithValidProgram(self):
 self.assertIn("foo", stdout)
 
 @skipIfWindows
+@skipIf(archs=no_match(["x86_64"]))
 def test_FakeAttachedRunInTerminalLauncherAndCheckEnvironment(self):
 if not self.isTestSupported():
 return
@@ -216,6 +223,7 @@ def 
test_FakeAttachedRunInTerminalLauncherAndCheckEnvironment(self):
 self.assertIn("FOO=BAR", stdout)
 
 @skipIfWindows
+@skipIf(archs=no_match(["x86_64"]))
 def test_NonAttachedRunInTerminalLauncher(self):
 if not self.isTestSupported():
 return



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Local: Handle noalias.addrspace in copyMetadataForLoad (PR #103939)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm closed 
https://github.com/llvm/llvm-project/pull/103939
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [Flang] Move runtime library files to FortranRuntime. NFC (PR #110298)

2024-10-09 Thread Michael Kruse via llvm-branch-commits

Meinersbur wrote:

> This is a gigantic change and I don't understand why it's being made. Why is 
> so much code moving out of flang/ ?

As explained in the summary, this is in preparation of #110217. 

#110217 changes the libFortranRuntime build to use the `LLVM_ENABLE_RUNTIMES` 
build system, like all the other LLVM runtimes do (compiler-rt, libc, libcxx, 
libcxxabi, offload, openmp, libunwind). `LLVM_ENABLE_RUNTIMES` was 
[introduced](https://github.com/llvm/llvm-project/commit/b688c5875d08c586f7b35b8f9da1493ebbf92b45)
 [to 
compile](https://discourse.llvm.org/t/rfc-a-vision-for-building-the-runtimes/56751/15)
 the runtimes for the target triple(s) instead of the host platform. This will 
be needed to support [cross-compilation with 
Flang](https://github.com/llvm/llvm-project/issues/102667). Flang is an LLVM 
project, it should follow LLVM conventions.

The  `LLVM_ENABLE_RUNTIMES` system assumes that each runtime library has a 
top-level directory in the repository root, so that's where I moved the files 
to. Moving the common files too was [result of a discussion result in the 
RFC](https://discourse.llvm.org/t/rfc-use-llvm-enable-runtimes-for-flangs-runtime/80826/8?u=meinersbur).

While it would be possible to only have a `FortranRuntime/CMakeLists.txt` and 
leave all the other files in `flang/`, I strongly believe this is a bad idea. 
It is the norm for LLVM runtimes to have separate sources. Coupling of runtime 
and compiler also [introduces 
problems](https://github.com/llvm/llvm-project/pull/110217#discussion_r1793179587)
 since both are built differently. It should be clear which files (also) belong 
to the runtime which has different build requirements. For instance, one must 
use `fortran::common::optional` instead of 
[`std::optional`](https://github.com/llvm/llvm-project/blob/1be64e5413cbe9cfa89539f70ad02ee1d8945ebe/flang/include/flang/Common/fast-int-set.h#L86)
 since otherwise the CUDA version fails building. With git being able to track 
renames, the renaming itself should be the least issues, much less than it 
would be to accumulate technical debt.

https://github.com/llvm/llvm-project/pull/110298
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add instruction flags when lowering ctor/dtor (PR #111652)

2024-10-09 Thread Joseph Huber via llvm-branch-commits

https://github.com/jhuber6 approved this pull request.


https://github.com/llvm/llvm-project/pull/111652
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Local: Handle noalias.addrspace in copyMetadataForLoad (PR #103939)

2024-10-09 Thread Fraser Cormack via llvm-branch-commits

https://github.com/frasercrmck approved this pull request.

LGTM other than nit

https://github.com/llvm/llvm-project/pull/103939
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Local: Handle noalias.addrspace in copyMetadataForLoad (PR #103939)

2024-10-09 Thread Fraser Cormack via llvm-branch-commits


@@ -177,7 +177,7 @@ define i32 @test_load_cast_combine_noalias_addrspace(ptr 
%ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.

frasercrmck wrote:

Should this comment reference `noalias.addrspace` rather than TBAA? 

https://github.com/llvm/llvm-project/pull/103939
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] DAG: Preserve more flags when expanding gep (PR #110815)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Oct 9, 5:42 AM EDT**: @arsenm started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/110815).


https://github.com/llvm/llvm-project/pull/110815
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Local: Handle noalias.addrspace in copyMetadataForLoad (PR #103939)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/103939
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Local: Handle noalias.addrspace in copyMetadataForLoad (PR #103939)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/103939

>From b655b4c3d3be5f8347ff4bc8fa37c1553f1fd980 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 14 Aug 2024 16:51:08 +0400
Subject: [PATCH 1/2] Local: Handle noalias.addrspace in copyMetadataForLoad

---
 llvm/lib/Transforms/Utils/Local.cpp| 1 +
 llvm/test/Transforms/InstCombine/loadstore-metadata.ll | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/Local.cpp 
b/llvm/lib/Transforms/Utils/Local.cpp
index 564e5f47024d40..49c5030357aacc 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3474,6 +3474,7 @@ void llvm::copyMetadataForLoad(LoadInst &Dest, const 
LoadInst &Source) {
 case LLVMContext::MD_mem_parallel_loop_access:
 case LLVMContext::MD_access_group:
 case LLVMContext::MD_noundef:
+case LLVMContext::MD_noalias_addrspace:
   // All of these directly apply.
   Dest.setMetadata(ID, N);
   break;
diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll 
b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
index 247a02f0bcc14a..dccbfbd13f73d0 100644
--- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -177,7 +177,7 @@ define i32 @test_load_cast_combine_noalias_addrspace(ptr 
%ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
 ; CHECK-LABEL: @test_load_cast_combine_noalias_addrspace(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:[[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT:[[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, 
!noalias.addrspace [[META10:![0-9]+]]
 ; CHECK-NEXT:ret i32 [[L1]]
 ;
 entry:

>From aa8358347867aa063a4003f3ef6c0c4ddcc046bc Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 9 Oct 2024 12:13:13 +0400
Subject: [PATCH 2/2] Fix test comment

---
 llvm/test/Transforms/InstCombine/loadstore-metadata.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll 
b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
index dccbfbd13f73d0..cc2876a0a18b9d 100644
--- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -174,7 +174,7 @@ define i32 @test_load_cast_combine_noundef(ptr %ptr) {
 }
 
 define i32 @test_load_cast_combine_noalias_addrspace(ptr %ptr) {
-; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves 
noalias.addrspace.
 ; CHECK-LABEL: @test_load_cast_combine_noalias_addrspace(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:[[L1:%.*]] = load i32, ptr [[PTR:%.*]], align 4, 
!noalias.addrspace [[META10:![0-9]+]]

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add instruction flags when lowering ctor/dtor (PR #111652)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/111652?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#111652** https://app.graphite.dev/github/pr/llvm/llvm-project/111652?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#111651** https://app.graphite.dev/github/pr/llvm/llvm-project/111651?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/111652
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add instruction flags when lowering ctor/dtor (PR #111652)

2024-10-09 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/111652

These should be well behaved address computations.

>From 61f32fccd4cb7f8ef167ea5e6a7fb8e820a459b2 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Wed, 9 Oct 2024 13:11:16 +0400
Subject: [PATCH] AMDGPU: Add instruction flags when lowering ctor/dtor

These should be well behaved address computations.
---
 llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp  | 10 +++---
 .../CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll  |  4 ++--
 llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll|  4 ++--
 llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll   |  4 ++--
 4 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
index ea11002bb6a5fa..a774ad53b5bede 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -112,9 +112,13 @@ static void createInitOrFiniCalls(Function &F, bool 
IsCtor) {
 Type *Int64Ty = IntegerType::getInt64Ty(C);
 auto *EndPtr = IRB.CreatePtrToInt(End, Int64Ty);
 auto *BeginPtr = IRB.CreatePtrToInt(Begin, Int64Ty);
-auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr);
-auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3));
-auto *Offset = IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1));
+auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr, "", /*HasNUW=*/true,
+   /*HasNSW=*/true);
+auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3), "",
+/*isExact=*/true);
+auto *Offset =
+IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1), "", /*HasNUW=*/true,
+  /*HasNSW=*/true);
 Start = IRB.CreateInBoundsGEP(
 PtrArrayTy, Begin,
 ArrayRef({ConstantInt::get(Int64Ty, 0), Offset}));
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll 
b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
index a87e07cb57e05e..968871af2d059a 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
@@ -64,8 +64,8 @@ define void @bar() addrspace(1) {
 ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
 ; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:[[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) 
@__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start 
to i64)), 3
-; CHECK-NEXT:[[TMP1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT:[[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr 
addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) 
@__fini_array_start to i64)), 3
+; CHECK-NEXT:[[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
 ; CHECK-NEXT:[[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], 
ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:[[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], 
@__fini_array_start
 ; CHECK-NEXT:br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label 
[[WHILE_END:%.*]]
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll 
b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
index a423b320db559d..98497a64e3204c 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
@@ -79,8 +79,8 @@ define internal void @bar() {
 ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:[[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) 
@__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start 
to i64)), 3
-; CHECK-NEXT:[[TMP1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT:[[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr 
addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) 
@__fini_array_start to i64)), 3
+; CHECK-NEXT:[[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
 ; CHECK-NEXT:[[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], 
ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:[[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], 
@__fini_array_start
 ; CHECK-NEXT:br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label 
[[WHILE_END:%.*]]
diff --git a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll 
b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
index 309ecb17e79ed1..a137f31c7aeeca 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
@@ -71,8 +71,8 @@ define internal void @bar.5() {
 ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:[[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) 
@__fin

[llvm-branch-commits] [llvm] AMDGPU: Add instruction flags when lowering ctor/dtor (PR #111652)

2024-10-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

These should be well behaved address computations.

---
Full diff: https://github.com/llvm/llvm-project/pull/111652.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp (+7-3) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll (+2-2) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
index ea11002bb6a5fa..a774ad53b5bede 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -112,9 +112,13 @@ static void createInitOrFiniCalls(Function &F, bool 
IsCtor) {
 Type *Int64Ty = IntegerType::getInt64Ty(C);
 auto *EndPtr = IRB.CreatePtrToInt(End, Int64Ty);
 auto *BeginPtr = IRB.CreatePtrToInt(Begin, Int64Ty);
-auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr);
-auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3));
-auto *Offset = IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1));
+auto *ByteSize = IRB.CreateSub(EndPtr, BeginPtr, "", /*HasNUW=*/true,
+   /*HasNSW=*/true);
+auto *Size = IRB.CreateAShr(ByteSize, ConstantInt::get(Int64Ty, 3), "",
+/*isExact=*/true);
+auto *Offset =
+IRB.CreateSub(Size, ConstantInt::get(Int64Ty, 1), "", /*HasNUW=*/true,
+  /*HasNSW=*/true);
 Start = IRB.CreateInBoundsGEP(
 PtrArrayTy, Begin,
 ArrayRef({ConstantInt::get(Int64Ty, 0), Offset}));
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll 
b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
index a87e07cb57e05e..968871af2d059a 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll
@@ -64,8 +64,8 @@ define void @bar() addrspace(1) {
 ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
 ; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:[[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) 
@__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start 
to i64)), 3
-; CHECK-NEXT:[[TMP1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT:[[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr 
addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) 
@__fini_array_start to i64)), 3
+; CHECK-NEXT:[[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
 ; CHECK-NEXT:[[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], 
ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:[[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], 
@__fini_array_start
 ; CHECK-NEXT:br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label 
[[WHILE_END:%.*]]
diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll 
b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
index a423b320db559d..98497a64e3204c 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll
@@ -79,8 +79,8 @@ define internal void @bar() {
 ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:[[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) 
@__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start 
to i64)), 3
-; CHECK-NEXT:[[TMP1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT:[[TMP0:%.*]] = ashr exact i64 sub nuw nsw (i64 ptrtoint (ptr 
addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) 
@__fini_array_start to i64)), 3
+; CHECK-NEXT:[[TMP1:%.*]] = sub nuw nsw i64 [[TMP0]], 1
 ; CHECK-NEXT:[[TMP2:%.*]] = getelementptr inbounds [0 x ptr addrspace(1)], 
ptr addrspace(1) @__fini_array_start, i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:[[TMP3:%.*]] = icmp uge ptr addrspace(1) [[TMP2]], 
@__fini_array_start
 ; CHECK-NEXT:br i1 [[TMP3]], label [[WHILE_ENTRY:%.*]], label 
[[WHILE_END:%.*]]
diff --git a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll 
b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
index 309ecb17e79ed1..a137f31c7aeeca 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
@@ -71,8 +71,8 @@ define internal void @bar.5() {
 ; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:[[TMP0:%.*]] = ashr i64 sub (i64 ptrtoint (ptr addrspace(1) 
@__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start 
to i64)), 3
-; CHECK-NEXT:[[TMP1:%.*]] = sub i64 [[TMP0]], 1
+; CHECK-NEXT:[[TMP0:%.*]]

[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)

2024-10-09 Thread Pengcheng Wang via llvm-branch-commits

wangpc-pp wrote:

Ping.

And some updates on vector support: currently, `ExpandMemCmp` will only 
generate integer types (`i128`, `i256`) and so on. So, if we simply add `128`, 
`256`, `vlen` to `LoadSizes`, the LLVM IR will use i128/i256/... and then they 
are expanded to legal scalar types as we don't mark i128/i256/... legal when 
RVV exists.

There are two ways to fix this:
1. Make `ExpandMemCmp` geenrate vector types/operations.
2. Make i128/i256/... legal.

I think the first one is the right approach but I need some agreements on this.

https://github.com/llvm/llvm-project/pull/107548
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)

2024-10-09 Thread Pengcheng Wang via llvm-branch-commits

https://github.com/wangpc-pp updated 
https://github.com/llvm/llvm-project/pull/107548

>From f21cfcfc90330ee3856746b6315a81a00313b0e0 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng 
Date: Fri, 6 Sep 2024 17:20:51 +0800
Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.6-beta.1
---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |  15 +
 .../Target/RISCV/RISCVTargetTransformInfo.h   |   3 +
 llvm/test/CodeGen/RISCV/memcmp.ll | 932 ++
 3 files changed, 950 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/memcmp.ll

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp 
b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index e809e15eacf696..ad532aadc83266 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2113,3 +2113,18 @@ bool RISCVTTIImpl::shouldConsiderAddressTypePromotion(
   }
   return Considerable;
 }
+
+RISCVTTIImpl::TTI::MemCmpExpansionOptions
+RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+  TTI::MemCmpExpansionOptions Options;
+  // FIXME: Vector haven't been tested.
+  Options.AllowOverlappingLoads =
+  (ST->enableUnalignedScalarMem() || ST->enableUnalignedScalarMem());
+  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+  Options.NumLoadsPerBlock = Options.MaxNumLoads;
+  if (ST->is64Bit())
+Options.LoadSizes.push_back(8);
+  llvm::append_range(Options.LoadSizes, ArrayRef({4, 2, 1}));
+  Options.AllowedTailExpansions = {3, 5, 6};
+  return Options;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h 
b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 763b89bfec0a66..ee9bed09df97f3 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -404,6 +404,9 @@ class RISCVTTIImpl : public BasicTTIImplBase {
   shouldConsiderAddressTypePromotion(const Instruction &I,
  bool &AllowPromotionWithoutCommonHeader);
   std::optional getMinPageSize() const { return 4096; }
+
+  TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+bool IsZeroCmp) const;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll 
b/llvm/test/CodeGen/RISCV/memcmp.ll
new file mode 100644
index 00..652cd02e2c750a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/memcmp.ll
@@ -0,0 +1,932 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -O2 | FileCheck %s 
--check-prefix=CHECK-ALIGNED-RV32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -O2 | FileCheck %s 
--check-prefix=CHECK-ALIGNED-RV64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 
-mattr=+unaligned-scalar-mem -O2 \
+; RUN:   | FileCheck %s --check-prefix=CHECK-UNALIGNED-RV32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 
-mattr=+unaligned-scalar-mem -O2 \
+; RUN:   | FileCheck %s --check-prefix=CHECK-UNALIGNED-RV64
+
+declare i32 @bcmp(i8*, i8*, iXLen) nounwind readonly
+declare i32 @memcmp(i8*, i8*, iXLen) nounwind readonly
+
+define i1 @bcmp_size_15(i8* %s1, i8* %s2) {
+; CHECK-ALIGNED-RV32-LABEL: bcmp_size_15:
+; CHECK-ALIGNED-RV32:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-NEXT:lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a3, 0(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a5, 3(a0)
+; CHECK-ALIGNED-RV32-NEXT:slli a2, a2, 8
+; CHECK-ALIGNED-RV32-NEXT:or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:slli a4, a4, 16
+; CHECK-ALIGNED-RV32-NEXT:slli a5, a5, 24
+; CHECK-ALIGNED-RV32-NEXT:or a4, a5, a4
+; CHECK-ALIGNED-RV32-NEXT:or a2, a4, a2
+; CHECK-ALIGNED-RV32-NEXT:lbu a3, 1(a1)
+; CHECK-ALIGNED-RV32-NEXT:lbu a4, 0(a1)
+; CHECK-ALIGNED-RV32-NEXT:lbu a5, 2(a1)
+; CHECK-ALIGNED-RV32-NEXT:lbu a6, 3(a1)
+; CHECK-ALIGNED-RV32-NEXT:slli a3, a3, 8
+; CHECK-ALIGNED-RV32-NEXT:or a3, a3, a4
+; CHECK-ALIGNED-RV32-NEXT:slli a5, a5, 16
+; CHECK-ALIGNED-RV32-NEXT:slli a6, a6, 24
+; CHECK-ALIGNED-RV32-NEXT:or a4, a6, a5
+; CHECK-ALIGNED-RV32-NEXT:or a3, a4, a3
+; CHECK-ALIGNED-RV32-NEXT:xor a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:lbu a3, 5(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a4, 4(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a5, 6(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a6, 7(a0)
+; CHECK-ALIGNED-RV32-NEXT:slli a3, a3, 8
+; CHECK-ALIGNED-RV32-NEXT:or a3, a3, a4
+; CHECK-ALIGNED-RV32-NEXT:slli a5, a5, 16
+; CHECK-ALIGNED-RV32-NEXT:slli a6, a6, 24
+; CHECK-ALIGNED-RV32-NEXT:or a4, a6, a5
+; CHECK-ALIGNED-RV32-NEXT:or a3, a4, a3
+; CHECK-ALIGNED-RV32-NEXT:lbu a4, 5(a1)
+; CHECK-ALIGNED-RV32-NEXT

[llvm-branch-commits] [llvm] [RISCV] Add initial support of memcmp expansion (PR #107548)

2024-10-09 Thread Pengcheng Wang via llvm-branch-commits

https://github.com/wangpc-pp updated 
https://github.com/llvm/llvm-project/pull/107548

>From f21cfcfc90330ee3856746b6315a81a00313b0e0 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng 
Date: Fri, 6 Sep 2024 17:20:51 +0800
Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.6-beta.1
---
 .../Target/RISCV/RISCVTargetTransformInfo.cpp |  15 +
 .../Target/RISCV/RISCVTargetTransformInfo.h   |   3 +
 llvm/test/CodeGen/RISCV/memcmp.ll | 932 ++
 3 files changed, 950 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/memcmp.ll

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp 
b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index e809e15eacf696..ad532aadc83266 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2113,3 +2113,18 @@ bool RISCVTTIImpl::shouldConsiderAddressTypePromotion(
   }
   return Considerable;
 }
+
+RISCVTTIImpl::TTI::MemCmpExpansionOptions
+RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+  TTI::MemCmpExpansionOptions Options;
+  // FIXME: Vector haven't been tested.
+  Options.AllowOverlappingLoads =
+  (ST->enableUnalignedScalarMem() || ST->enableUnalignedScalarMem());
+  Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+  Options.NumLoadsPerBlock = Options.MaxNumLoads;
+  if (ST->is64Bit())
+Options.LoadSizes.push_back(8);
+  llvm::append_range(Options.LoadSizes, ArrayRef({4, 2, 1}));
+  Options.AllowedTailExpansions = {3, 5, 6};
+  return Options;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h 
b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 763b89bfec0a66..ee9bed09df97f3 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -404,6 +404,9 @@ class RISCVTTIImpl : public BasicTTIImplBase {
   shouldConsiderAddressTypePromotion(const Instruction &I,
  bool &AllowPromotionWithoutCommonHeader);
   std::optional getMinPageSize() const { return 4096; }
+
+  TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+bool IsZeroCmp) const;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll 
b/llvm/test/CodeGen/RISCV/memcmp.ll
new file mode 100644
index 00..652cd02e2c750a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/memcmp.ll
@@ -0,0 +1,932 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -O2 | FileCheck %s 
--check-prefix=CHECK-ALIGNED-RV32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -O2 | FileCheck %s 
--check-prefix=CHECK-ALIGNED-RV64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 
-mattr=+unaligned-scalar-mem -O2 \
+; RUN:   | FileCheck %s --check-prefix=CHECK-UNALIGNED-RV32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 
-mattr=+unaligned-scalar-mem -O2 \
+; RUN:   | FileCheck %s --check-prefix=CHECK-UNALIGNED-RV64
+
+declare i32 @bcmp(i8*, i8*, iXLen) nounwind readonly
+declare i32 @memcmp(i8*, i8*, iXLen) nounwind readonly
+
+define i1 @bcmp_size_15(i8* %s1, i8* %s2) {
+; CHECK-ALIGNED-RV32-LABEL: bcmp_size_15:
+; CHECK-ALIGNED-RV32:   # %bb.0: # %entry
+; CHECK-ALIGNED-RV32-NEXT:lbu a2, 1(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a3, 0(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a4, 2(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a5, 3(a0)
+; CHECK-ALIGNED-RV32-NEXT:slli a2, a2, 8
+; CHECK-ALIGNED-RV32-NEXT:or a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:slli a4, a4, 16
+; CHECK-ALIGNED-RV32-NEXT:slli a5, a5, 24
+; CHECK-ALIGNED-RV32-NEXT:or a4, a5, a4
+; CHECK-ALIGNED-RV32-NEXT:or a2, a4, a2
+; CHECK-ALIGNED-RV32-NEXT:lbu a3, 1(a1)
+; CHECK-ALIGNED-RV32-NEXT:lbu a4, 0(a1)
+; CHECK-ALIGNED-RV32-NEXT:lbu a5, 2(a1)
+; CHECK-ALIGNED-RV32-NEXT:lbu a6, 3(a1)
+; CHECK-ALIGNED-RV32-NEXT:slli a3, a3, 8
+; CHECK-ALIGNED-RV32-NEXT:or a3, a3, a4
+; CHECK-ALIGNED-RV32-NEXT:slli a5, a5, 16
+; CHECK-ALIGNED-RV32-NEXT:slli a6, a6, 24
+; CHECK-ALIGNED-RV32-NEXT:or a4, a6, a5
+; CHECK-ALIGNED-RV32-NEXT:or a3, a4, a3
+; CHECK-ALIGNED-RV32-NEXT:xor a2, a2, a3
+; CHECK-ALIGNED-RV32-NEXT:lbu a3, 5(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a4, 4(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a5, 6(a0)
+; CHECK-ALIGNED-RV32-NEXT:lbu a6, 7(a0)
+; CHECK-ALIGNED-RV32-NEXT:slli a3, a3, 8
+; CHECK-ALIGNED-RV32-NEXT:or a3, a3, a4
+; CHECK-ALIGNED-RV32-NEXT:slli a5, a5, 16
+; CHECK-ALIGNED-RV32-NEXT:slli a6, a6, 24
+; CHECK-ALIGNED-RV32-NEXT:or a4, a6, a5
+; CHECK-ALIGNED-RV32-NEXT:or a3, a4, a3
+; CHECK-ALIGNED-RV32-NEXT:lbu a4, 5(a1)
+; CHECK-ALIGNED-RV32-NEXT

[llvm-branch-commits] [llvm] [AMDGPU] Serialize WWM_REG vreg flag (PR #110229)

2024-10-09 Thread Christudasan Devadasan via llvm-branch-commits

https://github.com/cdevadas approved this pull request.


https://github.com/llvm/llvm-project/pull/110229
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [Clang] Improve type traits recognition in `__has_builtin` (#111516) (PR #111660)

2024-10-09 Thread via llvm-branch-commits

https://github.com/cor3ntin created 
https://github.com/llvm/llvm-project/pull/111660

`__has_builtin` was relying on reversible identifiers and string matching to 
recognize builtin-type traits, leading to some newer type traits not being 
recognized.

Fixes #111477

>From e3ef65bd21443588acb142d44a2a5b4e5a055916 Mon Sep 17 00:00:00 2001
From: cor3ntin 
Date: Tue, 8 Oct 2024 23:03:32 +0200
Subject: [PATCH] [Clang] Improve type traits recognition in `__has_builtin`
 (#111516)

`__has_builtin` was relying on reversible identifiers and string
matching to recognize builtin-type traits, leading to some newer type
traits not being recognized.

Fixes #111477
---
 clang/include/clang/Basic/TokenKinds.def  |  5 ++-
 clang/lib/Lex/PPMacroExpansion.cpp| 52 ++-
 clang/test/Preprocessor/feature_tests.cpp |  6 ++-
 3 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/clang/include/clang/Basic/TokenKinds.def 
b/clang/include/clang/Basic/TokenKinds.def
index 8c54661e65cf46..0526fbf51bd91a 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -64,6 +64,10 @@
 #ifndef EXPRESSION_TRAIT
 #define EXPRESSION_TRAIT(I,E,K) KEYWORD(I,K)
 #endif
+#ifndef TRANSFORM_TYPE_TRAIT_DEF
+#define TRANSFORM_TYPE_TRAIT_DEF(K, Trait) KEYWORD(__##Trait, KEYCXX)
+#endif
+
 #ifndef ALIAS
 #define ALIAS(X,Y,Z)
 #endif
@@ -534,7 +538,6 @@ TYPE_TRAIT_1(__has_unique_object_representations,
 TYPE_TRAIT_2(__is_layout_compatible, IsLayoutCompatible, KEYCXX)
 TYPE_TRAIT_2(__is_pointer_interconvertible_base_of, 
IsPointerInterconvertibleBaseOf, KEYCXX)
 
-#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) KEYWORD(__##Trait, KEYCXX)
 #include "clang/Basic/TransformTypeTraits.def"
 
 // Clang-only C++ Type Traits
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp 
b/clang/lib/Lex/PPMacroExpansion.cpp
index 3913ff08c2eb55..fb88ec2bf603fe 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1602,6 +1602,34 @@ static bool isTargetVariantEnvironment(const TargetInfo 
&TI,
   return false;
 }
 
+static bool IsBuiltinTrait(Token &Tok) {
+
+#define TYPE_TRAIT_1(Spelling, Name, Key)  
\
+  case tok::kw_##Spelling: 
\
+return true;
+#define TYPE_TRAIT_2(Spelling, Name, Key)  
\
+  case tok::kw_##Spelling: 
\
+return true;
+#define TYPE_TRAIT_N(Spelling, Name, Key)  
\
+  case tok::kw_##Spelling: 
\
+return true;
+#define ARRAY_TYPE_TRAIT(Spelling, Name, Key)  
\
+  case tok::kw_##Spelling: 
\
+return true;
+#define EXPRESSION_TRAIT(Spelling, Name, Key)  
\
+  case tok::kw_##Spelling: 
\
+return true;
+#define TRANSFORM_TYPE_TRAIT_DEF(K, Spelling)  
\
+  case tok::kw___##Spelling:   
\
+return true;
+
+  switch (Tok.getKind()) {
+  default:
+return false;
+#include "clang/Basic/TokenKinds.def"
+  }
+}
+
 /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
 /// as a builtin macro, handle it and return the next token as 'Tok'.
 void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
@@ -1798,25 +1826,11 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
 getTargetInfo().getTargetOpts().FeatureMap);
   }
   return true;
-} else if (II->getTokenID() != tok::identifier ||
-   II->hasRevertedTokenIDToIdentifier()) {
-  // Treat all keywords that introduce a custom syntax of the form
-  //
-  //   '__some_keyword' '(' [...] ')'
-  //
-  // as being "builtin functions", even if the syntax isn't a valid
-  // function call (for example, because the builtin takes a type
-  // argument).
-  if (II->getName().starts_with("__builtin_") ||
-  II->getName().starts_with("__is_") ||
-  II->getName().starts_with("__has_"))
-return true;
-  return llvm::StringSwitch(II->getName())
-  .Case("__array_rank", true)
-  .Case("__array_extent", true)
-#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) .Case("__" #Trait, true)
-#include "clang/Basic/TransformTypeTraits.def"
-  .Default(false);
+} else if (IsBuiltinTrait(Tok)) {
+  return true;
+} else if (II->getTokenID() != tok::identifier &&
+   II->getName().starts_with("__builtin_")) {
+  return true;
 } else {
   return llvm::StringSwitch(II->getName())
   // Report builtin templates as being builtins

[llvm-branch-commits] [clang] [Clang] Improve type traits recognition in `__has_builtin` (#111516) (PR #111660)

2024-10-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: cor3ntin (cor3ntin)


Changes

`__has_builtin` was relying on reversible identifiers and string matching to 
recognize builtin-type traits, leading to some newer type traits not being 
recognized.

Fixes #111477

---
Full diff: https://github.com/llvm/llvm-project/pull/111660.diff


3 Files Affected:

- (modified) clang/include/clang/Basic/TokenKinds.def (+4-1) 
- (modified) clang/lib/Lex/PPMacroExpansion.cpp (+33-19) 
- (modified) clang/test/Preprocessor/feature_tests.cpp (+5-1) 


``diff
diff --git a/clang/include/clang/Basic/TokenKinds.def 
b/clang/include/clang/Basic/TokenKinds.def
index 8c54661e65cf46..0526fbf51bd91a 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -64,6 +64,10 @@
 #ifndef EXPRESSION_TRAIT
 #define EXPRESSION_TRAIT(I,E,K) KEYWORD(I,K)
 #endif
+#ifndef TRANSFORM_TYPE_TRAIT_DEF
+#define TRANSFORM_TYPE_TRAIT_DEF(K, Trait) KEYWORD(__##Trait, KEYCXX)
+#endif
+
 #ifndef ALIAS
 #define ALIAS(X,Y,Z)
 #endif
@@ -534,7 +538,6 @@ TYPE_TRAIT_1(__has_unique_object_representations,
 TYPE_TRAIT_2(__is_layout_compatible, IsLayoutCompatible, KEYCXX)
 TYPE_TRAIT_2(__is_pointer_interconvertible_base_of, 
IsPointerInterconvertibleBaseOf, KEYCXX)
 
-#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) KEYWORD(__##Trait, KEYCXX)
 #include "clang/Basic/TransformTypeTraits.def"
 
 // Clang-only C++ Type Traits
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp 
b/clang/lib/Lex/PPMacroExpansion.cpp
index 3913ff08c2eb55..fb88ec2bf603fe 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1602,6 +1602,34 @@ static bool isTargetVariantEnvironment(const TargetInfo 
&TI,
   return false;
 }
 
+static bool IsBuiltinTrait(Token &Tok) {
+
+#define TYPE_TRAIT_1(Spelling, Name, Key)  
\
+  case tok::kw_##Spelling: 
\
+return true;
+#define TYPE_TRAIT_2(Spelling, Name, Key)  
\
+  case tok::kw_##Spelling: 
\
+return true;
+#define TYPE_TRAIT_N(Spelling, Name, Key)  
\
+  case tok::kw_##Spelling: 
\
+return true;
+#define ARRAY_TYPE_TRAIT(Spelling, Name, Key)  
\
+  case tok::kw_##Spelling: 
\
+return true;
+#define EXPRESSION_TRAIT(Spelling, Name, Key)  
\
+  case tok::kw_##Spelling: 
\
+return true;
+#define TRANSFORM_TYPE_TRAIT_DEF(K, Spelling)  
\
+  case tok::kw___##Spelling:   
\
+return true;
+
+  switch (Tok.getKind()) {
+  default:
+return false;
+#include "clang/Basic/TokenKinds.def"
+  }
+}
+
 /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
 /// as a builtin macro, handle it and return the next token as 'Tok'.
 void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
@@ -1798,25 +1826,11 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
 getTargetInfo().getTargetOpts().FeatureMap);
   }
   return true;
-} else if (II->getTokenID() != tok::identifier ||
-   II->hasRevertedTokenIDToIdentifier()) {
-  // Treat all keywords that introduce a custom syntax of the form
-  //
-  //   '__some_keyword' '(' [...] ')'
-  //
-  // as being "builtin functions", even if the syntax isn't a valid
-  // function call (for example, because the builtin takes a type
-  // argument).
-  if (II->getName().starts_with("__builtin_") ||
-  II->getName().starts_with("__is_") ||
-  II->getName().starts_with("__has_"))
-return true;
-  return llvm::StringSwitch(II->getName())
-  .Case("__array_rank", true)
-  .Case("__array_extent", true)
-#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) .Case("__" #Trait, true)
-#include "clang/Basic/TransformTypeTraits.def"
-  .Default(false);
+} else if (IsBuiltinTrait(Tok)) {
+  return true;
+} else if (II->getTokenID() != tok::identifier &&
+   II->getName().starts_with("__builtin_")) {
+  return true;
 } else {
   return llvm::StringSwitch(II->getName())
   // Report builtin templates as being builtins.
diff --git a/clang/test/Preprocessor/feature_tests.cpp 
b/clang/test/Preprocessor/feature_tests.cpp
index 00421d74e6282a..13e2a9a261b667 100644
--- a/clang/test/Preprocessor/feature_tests.cpp
+++ b/clang/test/Preprocessor/feature_tests.cpp
@@ -31,7 +31,11 @@
 !__has_builtin(__underlying_type) || \
 !__has_builtin(__is_trivia

[llvm-branch-commits] [clang] [Clang] Improve type traits recognition in `__has_builtin` (#111516) (PR #111660)

2024-10-09 Thread via llvm-branch-commits

https://github.com/cor3ntin milestoned 
https://github.com/llvm/llvm-project/pull/111660
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [Clang] Improve type traits recognition in `__has_builtin` (#111516) (PR #111660)

2024-10-09 Thread via llvm-branch-commits

cor3ntin wrote:

Cherry pick as discussed in #111477

https://github.com/llvm/llvm-project/pull/111660
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AArch64][PAC] Move emission of LR checks in tail calls to AsmPrinter (PR #110705)

2024-10-09 Thread Daniil Kovalev via llvm-branch-commits


@@ -107,6 +107,19 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const 
MachineInstr &MI) const {
   unsigned NumBytes = 0;
   const MCInstrDesc &Desc = MI.getDesc();
 
+  if (!MI.isBundle() && isTailCallReturnInst(MI)) {
+NumBytes = Desc.getSize() ? Desc.getSize() : 4;
+
+const auto *MFI = MF->getInfo();
+if (!MFI->shouldSignReturnAddress(MF))
+  return NumBytes;
+
+auto &STI = MF->getSubtarget();

kovdan01 wrote:

Nit
```suggestion
const auto &STI = MF->getSubtarget();
```

https://github.com/llvm/llvm-project/pull/110705
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AArch64][PAC] Move emission of LR checks in tail calls to AsmPrinter (PR #110705)

2024-10-09 Thread Daniil Kovalev via llvm-branch-commits

https://github.com/kovdan01 edited 
https://github.com/llvm/llvm-project/pull/110705
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AArch64][PAC] Move emission of LR checks in tail calls to AsmPrinter (PR #110705)

2024-10-09 Thread Daniil Kovalev via llvm-branch-commits

https://github.com/kovdan01 commented:

@atrosinenko Could you please add a test for the case you've fixed in your 
latest commit edaae6a75879a38ffe8cd4e064aa9aca77126ef3? I might be missing 
smth, but the fix looks untested.

https://github.com/llvm/llvm-project/pull/110705
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [Flang][OpenMP] Derived type explicit allocatable member mapping (PR #111192)

2024-10-09 Thread Kareem Ergawy via llvm-branch-commits


@@ -82,104 +132,188 @@ class MapInfoFinalizationPass
 // perform an alloca and then store to it and retrieve the data from the 
new
 // alloca.
 if (mlir::isa(descriptor.getType())) {
-  // If we have already created a local allocation for this BoxType,
-  // we must be sure to re-use it so that we end up with the same
-  // allocations being utilised for the same descriptor across all map 
uses,
-  // this prevents runtime issues such as not appropriately releasing or
-  // deleting all mapped data.
-  auto find = localBoxAllocas.find(descriptor.getAsOpaquePointer());
-  if (find != localBoxAllocas.end()) {
-builder.create(loc, descriptor, find->second);
-descriptor = find->second;
-  } else {
-mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
-mlir::Block *allocaBlock = builder.getAllocaBlock();
-assert(allocaBlock && "No alloca block found for this top level op");
-builder.setInsertionPointToStart(allocaBlock);
-auto alloca = builder.create(loc, descriptor.getType());
-builder.restoreInsertionPoint(insPt);
-builder.create(loc, descriptor, alloca);
-localBoxAllocas[descriptor.getAsOpaquePointer()] = alloca;
-descriptor = alloca;
-  }
+  mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
+  mlir::Block *allocaBlock = builder.getAllocaBlock();
+  mlir::Location loc = boxMap->getLoc();
+  assert(allocaBlock && "No alloca block found for this top level op");
+  builder.setInsertionPointToStart(allocaBlock);
+  auto alloca = builder.create(loc, descriptor.getType());
+  builder.restoreInsertionPoint(insPt);
+  builder.create(loc, descriptor, alloca);
+  descriptor = alloca;
 }
 
+return descriptor;
+  }
+
+  /// Function that generates a FIR operation accessing the descriptor's
+  /// base address (BoxOffsetOp) and a MapInfoOp for it. The most
+  /// important thing to note is that we normally move the bounds from
+  /// the descriptor map onto the base address map.
+  mlir::omp::MapInfoOp getBaseAddrMap(mlir::Value descriptor,
+  mlir::OperandRange bounds,
+  int64_t mapType,
+  fir::FirOpBuilder &builder) {
+mlir::Location loc = descriptor.getLoc();
 mlir::Value baseAddrAddr = builder.create(
 loc, descriptor, fir::BoxFieldAttr::base_addr);
 
 // Member of the descriptor pointing at the allocated data
-mlir::Value baseAddr = builder.create(
+return builder.create(
 loc, baseAddrAddr.getType(), descriptor,
 mlir::TypeAttr::get(llvm::cast(
 fir::unwrapRefType(baseAddrAddr.getType()))
 .getElementType()),
 baseAddrAddr, /*members=*/mlir::SmallVector{},
-/*member_index=*/mlir::DenseIntElementsAttr{}, op.getBounds(),
-builder.getIntegerAttr(builder.getIntegerType(64, false),
-   op.getMapType().value()),
+/*membersIndex=*/mlir::ArrayAttr{}, bounds,
+builder.getIntegerAttr(builder.getIntegerType(64, false), mapType),
 builder.getAttr(
 mlir::omp::VariableCaptureKind::ByRef),
 /*name=*/builder.getStringAttr(""),
 /*partial_map=*/builder.getBoolAttr(false));
+  }
 
-// TODO: map the addendum segment of the descriptor, similarly to the
-// above base address/data pointer member.
+  /// This function adjusts the member indices vector to include a new
+  /// base address member. We take the position of the descriptor in
+  /// the member indices list, which is the index data that the base
+  /// addresses index will be based off of, as the base address is
+  /// a member of the descriptor. We must also alter other member's

ergawy wrote:

I think the comment was cut-off.

https://github.com/llvm/llvm-project/pull/92
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [Flang][OpenMP] Derived type explicit allocatable member mapping (PR #111192)

2024-10-09 Thread Kareem Ergawy via llvm-branch-commits


@@ -58,21 +67,62 @@ class MapInfoFinalizationPass
/*corresponding local alloca=*/fir::AllocaOp>
   localBoxAllocas;
 
-  void genDescriptorMemberMaps(mlir::omp::MapInfoOp op,
-   fir::FirOpBuilder &builder,
-   mlir::Operation *target) {
-mlir::Location loc = op.getLoc();
-mlir::Value descriptor = op.getVarPtr();
+  /// getMemberUserList gathers all users of a particular MapInfoOp that are
+  /// other MapInfoOp's and places them into the mapMemberUsers list, which
+  /// records the map that the current argument MapInfoOp "op" is part of
+  /// alongside the placement of "op" in the recorded users members list. The
+  /// intent of the generated list is to find all MapInfoOp's that may be
+  /// considered parents of the passed in "op" and in which it shows up in the
+  /// member list, alongside collecting the placement information of "op" in 
its
+  /// parents member list.
+  void
+  getMemberUserList(mlir::omp::MapInfoOp op,
+llvm::SmallVectorImpl &mapMemberUsers) 
{
+for (auto *users : op->getUsers())

ergawy wrote:

nit
```suggestion
for (auto *user : op->getUsers())
```

https://github.com/llvm/llvm-project/pull/92
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [Flang][OpenMP] Derived type explicit allocatable member mapping (PR #111192)

2024-10-09 Thread Kareem Ergawy via llvm-branch-commits


@@ -58,21 +67,62 @@ class MapInfoFinalizationPass
/*corresponding local alloca=*/fir::AllocaOp>
   localBoxAllocas;
 
-  void genDescriptorMemberMaps(mlir::omp::MapInfoOp op,
-   fir::FirOpBuilder &builder,
-   mlir::Operation *target) {
-mlir::Location loc = op.getLoc();
-mlir::Value descriptor = op.getVarPtr();
+  /// getMemberUserList gathers all users of a particular MapInfoOp that are
+  /// other MapInfoOp's and places them into the mapMemberUsers list, which
+  /// records the map that the current argument MapInfoOp "op" is part of
+  /// alongside the placement of "op" in the recorded users members list. The
+  /// intent of the generated list is to find all MapInfoOp's that may be
+  /// considered parents of the passed in "op" and in which it shows up in the
+  /// member list, alongside collecting the placement information of "op" in 
its
+  /// parents member list.
+  void
+  getMemberUserList(mlir::omp::MapInfoOp op,
+llvm::SmallVectorImpl &mapMemberUsers) 
{

ergawy wrote:

In what scenario will `mapMemberUsers` end up with more than one element?

As a test (just to learn more about the changes), I modified this function as 
follows and none of the tests failed:
```diff
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -81,8 +81,10 @@ class MapInfoFinalizationPass
 for (auto *users : op->getUsers())
   if (auto map = mlir::dyn_cast_if_present(users))
 for (auto [i, mapMember] : llvm::enumerate(map.getMembers()))
-  if (mapMember.getDefiningOp() == op)
+  if (mapMember.getDefiningOp() == op) {
 mapMemberUsers.push_back({map, i});
+break;
+  }
   }
 
   llvm::SmallVector
```

Also, `mapMemberUsers[0]` is the only element used below (in 
`genDescriptorMemberMaps`).

https://github.com/llvm/llvm-project/pull/92
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT] Allow builtin_unreachable to be at MaxSize address (PR #111771)

2024-10-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-bolt

Author: Amir Ayupov (aaupov)


Changes

Lift the restriction that builtin_unreachable has to be strictly within
`MaxSize` of a function.

Test Plan: added builtin_unreachable.s


---
Full diff: https://github.com/llvm/llvm-project/pull/111771.diff


2 Files Affected:

- (modified) bolt/lib/Core/BinaryFunction.cpp (+3-2) 
- (added) bolt/test/X86/builtin_unreachable.s (+33) 


``diff
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 27c8ccefedee10..8b1f441a3a01da 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1365,8 +1365,9 @@ Error BinaryFunction::disassemble() {
   if (containsAddress(TargetAddress)) {
 TargetSymbol = getOrCreateLocalLabel(TargetAddress);
   } else {
-if (TargetAddress == getAddress() + getSize() &&
-TargetAddress < getAddress() + getMaxSize() &&
+if (BC.isELF() && !BC.getBinaryDataAtAddress(TargetAddress) &&
+TargetAddress == getAddress() + getSize() &&
+TargetAddress <= getAddress() + getMaxSize() &&
 !(BC.isAArch64() &&
   BC.handleAArch64Veneer(TargetAddress, /*MatchOnly*/ true))) {
   // Result of __builtin_unreachable().
diff --git a/bolt/test/X86/builtin_unreachable.s 
b/bolt/test/X86/builtin_unreachable.s
new file mode 100644
index 00..ab533629d1a846
--- /dev/null
+++ b/bolt/test/X86/builtin_unreachable.s
@@ -0,0 +1,33 @@
+## Check that BOLT properly identifies a jump to builtin_unreachable
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
+# RUN: ld.lld -q -o %t %t.o
+# RUN: llvm-bolt %t -o %t.null -lite=0 -print-disasm | FileCheck %s
+# CHECK:  callq bar
+# CHECK-NEXT: nop
+
+.text
+.globl main
+.type main, @function
+main:
+  call foo
+  .size main, .-main
+
+.section .mytext.bar, "ax"
+.globl  bar
+.type  bar, @function
+bar:
+  ud2
+   .size   bar, .-bar
+
+.section .mytext.foo, "ax"
+.globl foo
+.type  foo, @function
+foo:
+.cfi_startproc
+  callq bar
+  jmp .Lunreachable
+  ret
+  .cfi_endproc
+   .size   foo, .-foo
+.Lunreachable:

``




https://github.com/llvm/llvm-project/pull/111771
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT] Allow builtin_unreachable to be at MaxSize address (PR #111771)

2024-10-09 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov created 
https://github.com/llvm/llvm-project/pull/111771

Lift the restriction that builtin_unreachable has to be strictly within
`MaxSize` of a function.

Test Plan: added builtin_unreachable.s



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Allow builtin_unreachable to be at MaxSize address (PR #111771)

2024-10-09 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/111771

>From 67faa5f82d5b754f9a0c1f7ded516b5e25ce3f24 Mon Sep 17 00:00:00 2001
From: Amir Ayupov 
Date: Wed, 9 Oct 2024 16:56:07 -0700
Subject: [PATCH] fix indentation

Created using spr 1.3.4
---
 bolt/test/X86/builtin_unreachable.s | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bolt/test/X86/builtin_unreachable.s 
b/bolt/test/X86/builtin_unreachable.s
index ab533629d1a846..b66cf0058a50d1 100644
--- a/bolt/test/X86/builtin_unreachable.s
+++ b/bolt/test/X86/builtin_unreachable.s
@@ -11,14 +11,14 @@
 .type main, @function
 main:
   call foo
-  .size main, .-main
+.size main, .-main
 
 .section .mytext.bar, "ax"
 .globl  bar
 .type  bar, @function
 bar:
   ud2
-   .size   bar, .-bar
+.size  bar, .-bar
 
 .section .mytext.foo, "ax"
 .globl foo
@@ -29,5 +29,5 @@ foo:
   jmp .Lunreachable
   ret
   .cfi_endproc
-   .size   foo, .-foo
+.size  foo, .-foo
 .Lunreachable:

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [VectorCombine] Do not try to operate on OperandBundles. (#111635) (PR #111796)

2024-10-09 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/111796

Backport c136d3237a3c6230cfe1ab3f0f6790f903c54a27

Requested by: @davemgreen

>From b64f2902d1b01234f57888fcf78282bc7cddb293 Mon Sep 17 00:00:00 2001
From: David Green 
Date: Wed, 9 Oct 2024 16:20:03 +0100
Subject: [PATCH] [VectorCombine] Do not try to operate on OperandBundles.
 (#111635)

This bails out if we see an intrinsic with an operand bundle on it, to
make sure we don't process the bundles incorrectly.

Fixes #110382.

(cherry picked from commit c136d3237a3c6230cfe1ab3f0f6790f903c54a27)
---
 .../Transforms/Vectorize/VectorCombine.cpp| 59 ++-
 .../AArch64/shuffletoidentity.ll  | 48 +++
 2 files changed, 79 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 444598520c981a..679934d07e36d0 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1900,33 +1900,35 @@ bool VectorCombine::foldShuffleToIdentity(Instruction 
&I) {
 
 // We need each element to be the same type of value, and check that each
 // element has a single use.
-if (all_of(drop_begin(Item), [Item](InstLane IL) {
-  Value *FrontV = Item.front().first->get();
-  if (!IL.first)
-return true;
-  Value *V = IL.first->get();
-  if (auto *I = dyn_cast(V); I && !I->hasOneUse())
-return false;
-  if (V->getValueID() != FrontV->getValueID())
-return false;
-  if (auto *CI = dyn_cast(V))
-if (CI->getPredicate() != cast(FrontV)->getPredicate())
-  return false;
-  if (auto *CI = dyn_cast(V))
-if (CI->getSrcTy() != cast(FrontV)->getSrcTy())
-  return false;
-  if (auto *SI = dyn_cast(V))
-if (!isa(SI->getOperand(0)->getType()) ||
-SI->getOperand(0)->getType() !=
-cast(FrontV)->getOperand(0)->getType())
-  return false;
-  if (isa(V) && !isa(V))
-return false;
-  auto *II = dyn_cast(V);
-  return !II || (isa(FrontV) &&
- II->getIntrinsicID() ==
- cast(FrontV)->getIntrinsicID());
-})) {
+auto CheckLaneIsEquivalentToFirst = [Item](InstLane IL) {
+  Value *FrontV = Item.front().first->get();
+  if (!IL.first)
+return true;
+  Value *V = IL.first->get();
+  if (auto *I = dyn_cast(V); I && !I->hasOneUse())
+return false;
+  if (V->getValueID() != FrontV->getValueID())
+return false;
+  if (auto *CI = dyn_cast(V))
+if (CI->getPredicate() != cast(FrontV)->getPredicate())
+  return false;
+  if (auto *CI = dyn_cast(V))
+if (CI->getSrcTy() != cast(FrontV)->getSrcTy())
+  return false;
+  if (auto *SI = dyn_cast(V))
+if (!isa(SI->getOperand(0)->getType()) ||
+SI->getOperand(0)->getType() !=
+cast(FrontV)->getOperand(0)->getType())
+  return false;
+  if (isa(V) && !isa(V))
+return false;
+  auto *II = dyn_cast(V);
+  return !II || (isa(FrontV) &&
+ II->getIntrinsicID() ==
+ cast(FrontV)->getIntrinsicID() &&
+ !II->hasOperandBundles());
+};
+if (all_of(drop_begin(Item), CheckLaneIsEquivalentToFirst)) {
   // Check the operator is one that we support.
   if (isa(FrontU)) {
 //  We exclude div/rem in case they hit UB from poison lanes.
@@ -1954,7 +1956,8 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) 
{
 Worklist.push_back(generateInstLaneVectorFromOperand(Item, 2));
 continue;
   } else if (auto *II = dyn_cast(FrontU);
- II && isTriviallyVectorizable(II->getIntrinsicID())) {
+ II && isTriviallyVectorizable(II->getIntrinsicID()) &&
+ !II->hasOperandBundles()) {
 for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
   if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) {
 if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) {
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll 
b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index af04fb0ab4621b..66fe11369d88be 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -1066,4 +1066,52 @@ entry:
   ret <2 x float> %4
 }
 
+define <16 x i64> @operandbundles(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c) {
+; CHECK-LABEL: @operandbundles(
+; CHECK-NEXT:[[CALL:%.*]] = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> 
[[A:%.*]], <4 x i64> [[B:%.*]], <4 x i64> [[C:%.*]]) [ "jl_roots"(ptr 
addrspace(10) null, ptr addrs

[llvm-branch-commits] [llvm] release/19.x: [VectorCombine] Do not try to operate on OperandBundles. (#111635) (PR #111796)

2024-10-09 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: None (llvmbot)


Changes

Backport c136d3237a3c6230cfe1ab3f0f6790f903c54a27

Requested by: @davemgreen

---
Full diff: https://github.com/llvm/llvm-project/pull/111796.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+31-28) 
- (modified) llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll 
(+48) 


``diff
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 444598520c981a..679934d07e36d0 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1900,33 +1900,35 @@ bool VectorCombine::foldShuffleToIdentity(Instruction 
&I) {
 
 // We need each element to be the same type of value, and check that each
 // element has a single use.
-if (all_of(drop_begin(Item), [Item](InstLane IL) {
-  Value *FrontV = Item.front().first->get();
-  if (!IL.first)
-return true;
-  Value *V = IL.first->get();
-  if (auto *I = dyn_cast(V); I && !I->hasOneUse())
-return false;
-  if (V->getValueID() != FrontV->getValueID())
-return false;
-  if (auto *CI = dyn_cast(V))
-if (CI->getPredicate() != cast(FrontV)->getPredicate())
-  return false;
-  if (auto *CI = dyn_cast(V))
-if (CI->getSrcTy() != cast(FrontV)->getSrcTy())
-  return false;
-  if (auto *SI = dyn_cast(V))
-if (!isa(SI->getOperand(0)->getType()) ||
-SI->getOperand(0)->getType() !=
-cast(FrontV)->getOperand(0)->getType())
-  return false;
-  if (isa(V) && !isa(V))
-return false;
-  auto *II = dyn_cast(V);
-  return !II || (isa(FrontV) &&
- II->getIntrinsicID() ==
- cast(FrontV)->getIntrinsicID());
-})) {
+auto CheckLaneIsEquivalentToFirst = [Item](InstLane IL) {
+  Value *FrontV = Item.front().first->get();
+  if (!IL.first)
+return true;
+  Value *V = IL.first->get();
+  if (auto *I = dyn_cast(V); I && !I->hasOneUse())
+return false;
+  if (V->getValueID() != FrontV->getValueID())
+return false;
+  if (auto *CI = dyn_cast(V))
+if (CI->getPredicate() != cast(FrontV)->getPredicate())
+  return false;
+  if (auto *CI = dyn_cast(V))
+if (CI->getSrcTy() != cast(FrontV)->getSrcTy())
+  return false;
+  if (auto *SI = dyn_cast(V))
+if (!isa(SI->getOperand(0)->getType()) ||
+SI->getOperand(0)->getType() !=
+cast(FrontV)->getOperand(0)->getType())
+  return false;
+  if (isa(V) && !isa(V))
+return false;
+  auto *II = dyn_cast(V);
+  return !II || (isa(FrontV) &&
+ II->getIntrinsicID() ==
+ cast(FrontV)->getIntrinsicID() &&
+ !II->hasOperandBundles());
+};
+if (all_of(drop_begin(Item), CheckLaneIsEquivalentToFirst)) {
   // Check the operator is one that we support.
   if (isa(FrontU)) {
 //  We exclude div/rem in case they hit UB from poison lanes.
@@ -1954,7 +1956,8 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) 
{
 Worklist.push_back(generateInstLaneVectorFromOperand(Item, 2));
 continue;
   } else if (auto *II = dyn_cast(FrontU);
- II && isTriviallyVectorizable(II->getIntrinsicID())) {
+ II && isTriviallyVectorizable(II->getIntrinsicID()) &&
+ !II->hasOperandBundles()) {
 for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) {
   if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) {
 if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) {
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll 
b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index af04fb0ab4621b..66fe11369d88be 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -1066,4 +1066,52 @@ entry:
   ret <2 x float> %4
 }
 
+define <16 x i64> @operandbundles(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c) {
+; CHECK-LABEL: @operandbundles(
+; CHECK-NEXT:[[CALL:%.*]] = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> 
[[A:%.*]], <4 x i64> [[B:%.*]], <4 x i64> [[C:%.*]]) [ "jl_roots"(ptr 
addrspace(10) null, ptr addrspace(10) null) ]
+; CHECK-NEXT:[[SHUFFLEVECTOR:%.*]] = shufflevector <4 x i64> [[CALL]], <4 
x i64> poison, <16 x i32> 
+; CHECK-NEXT:[[SHUFFLEVECTOR1:%.*]] = shufflevector <16 x i64> 
[[SHUFFLEVECTOR]], <16 x i64> undef, <16 x i32> 
+; CHECK-NEXT:ret <16 x i64> [[SHUFFLEVECTOR1]]
+;
+  %call = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a

[llvm-branch-commits] [llvm] release/19.x: [VectorCombine] Do not try to operate on OperandBundles. (#111635) (PR #111796)

2024-10-09 Thread via llvm-branch-commits

llvmbot wrote:

@RKSimon What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/111796
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [VectorCombine] Do not try to operate on OperandBundles. (#111635) (PR #111796)

2024-10-09 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/111796
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Allow builtin_unreachable to be at MaxSize address (PR #111771)

2024-10-09 Thread Maksim Panchenko via llvm-branch-commits


@@ -1365,8 +1365,9 @@ Error BinaryFunction::disassemble() {
   if (containsAddress(TargetAddress)) {
 TargetSymbol = getOrCreateLocalLabel(TargetAddress);
   } else {
-if (TargetAddress == getAddress() + getSize() &&
-TargetAddress < getAddress() + getMaxSize() &&
+if (BC.isELF() && !BC.getBinaryDataAtAddress(TargetAddress) &&

maksfb wrote:

What happens when there's a data object at `TargetAddress`?

https://github.com/llvm/llvm-project/pull/111771
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement TTP P0522 pack matching for deduced function template calls. (PR #111457)

2024-10-09 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov updated 
https://github.com/llvm/llvm-project/pull/111457

>From 1975bae70df78c437d10a361a15aca27f218460e Mon Sep 17 00:00:00 2001
From: Matheus Izvekov 
Date: Sat, 5 Oct 2024 21:56:51 -0300
Subject: [PATCH] [clang] Implement TTP 'reversed' pack matching for deduced
 function template calls.

Clang previously missed implementing P0522 pack matching
for deduced function template calls.
---
 clang/docs/ReleaseNotes.rst  |  4 ++
 clang/include/clang/Sema/Overload.h  |  7 +-
 clang/include/clang/Sema/Sema.h  | 23 ---
 clang/lib/Sema/SemaLookup.cpp|  1 +
 clang/lib/Sema/SemaOverload.cpp  | 50 +--
 clang/lib/Sema/SemaTemplate.cpp  | 23 +++
 clang/lib/Sema/SemaTemplateDeduction.cpp | 76 --
 clang/test/SemaTemplate/cwg2398.cpp  | 81 
 8 files changed, 193 insertions(+), 72 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 153901fb3b1ed7..71967b1d27fbad 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -151,6 +151,10 @@ C++ Language Changes
 - The builtin type alias ``__builtin_common_type`` has been added to improve 
the
   performance of ``std::common_type``.
 
+- When matching a template to a template template parameter in the context of 
a deduced
+  function template call, clang now implements ``[temp.arg.template]p3.3``,
+  allowing a pack on the parameter to match a non-pack argument.
+
 C++2c Feature Support
 ^
 
diff --git a/clang/include/clang/Sema/Overload.h 
b/clang/include/clang/Sema/Overload.h
index c716a25bb673b8..8b7480636c36e7 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -925,6 +925,8 @@ class Sema;
 
 bool TookAddressOfOverload : 1;
 
+bool HasMatchedPackOnParmToNonPackOnArg : 1;
+
 /// True if the candidate was found using ADL.
 CallExpr::ADLCallKind IsADLCandidate : 1;
 
@@ -999,8 +1001,9 @@ class Sema;
 friend class OverloadCandidateSet;
 OverloadCandidate()
 : IsSurrogate(false), IgnoreObjectArgument(false),
-  TookAddressOfOverload(false), IsADLCandidate(CallExpr::NotADL),
-  RewriteKind(CRK_None) {}
+  TookAddressOfOverload(false),
+  HasMatchedPackOnParmToNonPackOnArg(false),
+  IsADLCandidate(CallExpr::NotADL), RewriteKind(CRK_None) {}
   };
 
   /// OverloadCandidateSet - A set of overload candidates, used in C++
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index d99a0c7af4b0ca..5f045860acb7b2 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -10132,7 +10132,8 @@ class Sema final : public SemaBase {
   ADLCallKind IsADLCandidate = ADLCallKind::NotADL,
   ConversionSequenceList EarlyConversions = std::nullopt,
   OverloadCandidateParamOrder PO = {},
-  bool AggregateCandidateDeduction = false);
+  bool AggregateCandidateDeduction = false,
+  bool HasMatchedPackOnParmToNonPackOnArg = false);
 
   /// Add all of the function declarations in the given function set to
   /// the overload candidate set.
@@ -10167,7 +10168,8 @@ class Sema final : public SemaBase {
  bool SuppressUserConversions = false,
  bool PartialOverloading = false,
  ConversionSequenceList EarlyConversions = std::nullopt,
- OverloadCandidateParamOrder PO = {});
+ OverloadCandidateParamOrder PO = {},
+ bool HasMatchedPackOnParmToNonPackOnArg = false);
 
   /// Add a C++ member function template as a candidate to the candidate
   /// set, using template argument deduction to produce an appropriate member
@@ -10213,7 +10215,8 @@ class Sema final : public SemaBase {
   CXXConversionDecl *Conversion, DeclAccessPair FoundDecl,
   CXXRecordDecl *ActingContext, Expr *From, QualType ToType,
   OverloadCandidateSet &CandidateSet, bool AllowObjCConversionOnExplicit,
-  bool AllowExplicit, bool AllowResultConversion = true);
+  bool AllowExplicit, bool AllowResultConversion = true,
+  bool HasMatchedPackOnParmToNonPackOnArg = false);
 
   /// Adds a conversion function template specialization
   /// candidate to the overload set, using template argument deduction
@@ -11637,7 +11640,7 @@ class Sema final : public SemaBase {
 SourceLocation RAngleLoc, unsigned ArgumentPackIndex,
 SmallVectorImpl &SugaredConverted,
 SmallVectorImpl &CanonicalConverted,
-CheckTemplateArgumentKind CTAK,
+CheckTemplateArgumentKind CTAK, bool PartialOrdering,
 bool *MatchedPackOnParmToNonPackOnArg);
 
   /// Check that the given template arguments can be provided to
@@ -11720,7 +11723,8 @@ class Sema final : public SemaBase {
   //

[llvm-branch-commits] [clang] [clang] CWG2398: improve overload resolution backwards compat (PR #107350)

2024-10-09 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov updated 
https://github.com/llvm/llvm-project/pull/107350

>From aa016046df11993dff967eff970530c73ecf849e Mon Sep 17 00:00:00 2001
From: Matheus Izvekov 
Date: Thu, 5 Sep 2024 00:25:40 -0300
Subject: [PATCH] [clang] CWG2398: improve overload resolution backwards compat

With this change, we discriminate if the primary template and which partial
specializations would have participated in overload resolution prior to
P0522 changes.

We collect those in an initial set. If this set is not empty, or the
primary template would have matched, we proceed with this set as the
candidates for overload resolution.

Otherwise, we build a new overload set with everything else, and proceed
as usual.
---
 clang/docs/ReleaseNotes.rst  |  3 +-
 clang/include/clang/Sema/Sema.h  | 14 ---
 clang/include/clang/Sema/TemplateDeduction.h | 13 ++
 clang/lib/Sema/SemaLookup.cpp|  3 +-
 clang/lib/Sema/SemaTemplate.cpp  | 44 
 clang/lib/Sema/SemaTemplateDeduction.cpp | 43 +--
 clang/lib/Sema/SemaTemplateInstantiate.cpp   | 24 ++-
 clang/test/SemaTemplate/cwg2398.cpp  |  6 +--
 8 files changed, 97 insertions(+), 53 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7270e6898dbc7f..153901fb3b1ed7 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -204,7 +204,8 @@ Resolutions to C++ Defect Reports
   (`CWG2351: void{} `_).
 
 - Clang now has improved resolution to CWG2398, allowing class templates to 
have
-  default arguments deduced when partial ordering.
+  default arguments deduced when partial ordering, and better backwards 
compatibility
+  in overload resolution.
 
 - Clang now allows comparing unequal object pointers that have been cast to 
``void *``
   in constant expressions. These comparisons always worked in non-constant 
expressions.
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 043456438b6d03..d99a0c7af4b0ca 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11637,7 +11637,8 @@ class Sema final : public SemaBase {
 SourceLocation RAngleLoc, unsigned ArgumentPackIndex,
 SmallVectorImpl &SugaredConverted,
 SmallVectorImpl &CanonicalConverted,
-CheckTemplateArgumentKind CTAK);
+CheckTemplateArgumentKind CTAK,
+bool *MatchedPackOnParmToNonPackOnArg);
 
   /// Check that the given template arguments can be provided to
   /// the given template, converting the arguments along the way.
@@ -11684,7 +11685,8 @@ class Sema final : public SemaBase {
   SmallVectorImpl &SugaredConverted,
   SmallVectorImpl &CanonicalConverted,
   bool UpdateArgsWithConversions = true,
-  bool *ConstraintsNotSatisfied = nullptr, bool PartialOrderingTTP = 
false);
+  bool *ConstraintsNotSatisfied = nullptr, bool PartialOrderingTTP = false,
+  bool *MatchedPackOnParmToNonPackOnArg = nullptr);
 
   bool CheckTemplateTypeArgument(
   TemplateTypeParmDecl *Param, TemplateArgumentLoc &Arg,
@@ -11718,7 +11720,8 @@ class Sema final : public SemaBase {
   /// It returns true if an error occurred, and false otherwise.
   bool CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param,
  TemplateParameterList *Params,
- TemplateArgumentLoc &Arg, bool IsDeduced);
+ TemplateArgumentLoc &Arg, bool IsDeduced,
+ bool *MatchedPackOnParmToNonPackOnArg);
 
   void NoteTemplateLocation(const NamedDecl &Decl,
 std::optional ParamRange = {});
@@ -12419,7 +12422,7 @@ class Sema final : public SemaBase {
   bool isTemplateTemplateParameterAtLeastAsSpecializedAs(
   TemplateParameterList *PParam, TemplateDecl *PArg, TemplateDecl *AArg,
   const DefaultArguments &DefaultArgs, SourceLocation ArgLoc,
-  bool IsDeduced);
+  bool IsDeduced, bool *MatchedPackOnParmToNonPackOnArg);
 
   /// Mark which template parameters are used in a given expression.
   ///
@@ -13410,7 +13413,8 @@ class Sema final : public SemaBase {
   bool InstantiateClassTemplateSpecialization(
   SourceLocation PointOfInstantiation,
   ClassTemplateSpecializationDecl *ClassTemplateSpec,
-  TemplateSpecializationKind TSK, bool Complain = true);
+  TemplateSpecializationKind TSK, bool Complain = true,
+  bool PrimaryHasMatchedPackOnParmToNonPackOnArg = false);
 
   /// Instantiates the definitions of all of the member
   /// of the given class, which is an instantiation of a class template
diff --git a/clang/include/clang/Sema/TemplateDeduction.h 
b/clang/include/clang/Sema/TemplateDeduction.h
index 28b0

[llvm-branch-commits] [clang] [clang] Implement TTP P0522 pack matching for deduced function template calls. (PR #111457)

2024-10-09 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov edited 
https://github.com/llvm/llvm-project/pull/111457
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement TTP P0522 pack matching for deduced function template calls. (PR #111457)

2024-10-09 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov edited 
https://github.com/llvm/llvm-project/pull/111457
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement TTP P0522 pack matching for deduced function template calls. (PR #111457)

2024-10-09 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov updated 
https://github.com/llvm/llvm-project/pull/111457

>From 0df57333178d360c441956cc2509f5eca4434c0e Mon Sep 17 00:00:00 2001
From: Matheus Izvekov 
Date: Sat, 5 Oct 2024 21:56:51 -0300
Subject: [PATCH] [clang] Implement TTP 'reversed' pack matching for deduced
 function template calls.

Clang previously missed implementing P0522 pack matching
for deduced function template calls.
---
 clang/docs/ReleaseNotes.rst  |  4 ++
 clang/include/clang/Sema/Overload.h  |  7 +-
 clang/include/clang/Sema/Sema.h  | 23 ---
 clang/lib/Sema/SemaLookup.cpp|  1 +
 clang/lib/Sema/SemaOverload.cpp  | 50 +--
 clang/lib/Sema/SemaTemplate.cpp  | 23 +++
 clang/lib/Sema/SemaTemplateDeduction.cpp | 76 --
 clang/test/SemaTemplate/cwg2398.cpp  | 81 
 8 files changed, 193 insertions(+), 72 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 153901fb3b1ed7..1324b28d84c3d0 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -151,6 +151,10 @@ C++ Language Changes
 - The builtin type alias ``__builtin_common_type`` has been added to improve 
the
   performance of ``std::common_type``.
 
+- When matching a template to a template template parameter in the context of 
a deduced
+  function template call, clang now implements P0522 pack rules,
+  allowing a non-pack parameter to match a pack argument.
+
 C++2c Feature Support
 ^
 
diff --git a/clang/include/clang/Sema/Overload.h 
b/clang/include/clang/Sema/Overload.h
index c716a25bb673b8..8b7480636c36e7 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -925,6 +925,8 @@ class Sema;
 
 bool TookAddressOfOverload : 1;
 
+bool HasMatchedPackOnParmToNonPackOnArg : 1;
+
 /// True if the candidate was found using ADL.
 CallExpr::ADLCallKind IsADLCandidate : 1;
 
@@ -999,8 +1001,9 @@ class Sema;
 friend class OverloadCandidateSet;
 OverloadCandidate()
 : IsSurrogate(false), IgnoreObjectArgument(false),
-  TookAddressOfOverload(false), IsADLCandidate(CallExpr::NotADL),
-  RewriteKind(CRK_None) {}
+  TookAddressOfOverload(false),
+  HasMatchedPackOnParmToNonPackOnArg(false),
+  IsADLCandidate(CallExpr::NotADL), RewriteKind(CRK_None) {}
   };
 
   /// OverloadCandidateSet - A set of overload candidates, used in C++
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index d99a0c7af4b0ca..5f045860acb7b2 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -10132,7 +10132,8 @@ class Sema final : public SemaBase {
   ADLCallKind IsADLCandidate = ADLCallKind::NotADL,
   ConversionSequenceList EarlyConversions = std::nullopt,
   OverloadCandidateParamOrder PO = {},
-  bool AggregateCandidateDeduction = false);
+  bool AggregateCandidateDeduction = false,
+  bool HasMatchedPackOnParmToNonPackOnArg = false);
 
   /// Add all of the function declarations in the given function set to
   /// the overload candidate set.
@@ -10167,7 +10168,8 @@ class Sema final : public SemaBase {
  bool SuppressUserConversions = false,
  bool PartialOverloading = false,
  ConversionSequenceList EarlyConversions = std::nullopt,
- OverloadCandidateParamOrder PO = {});
+ OverloadCandidateParamOrder PO = {},
+ bool HasMatchedPackOnParmToNonPackOnArg = false);
 
   /// Add a C++ member function template as a candidate to the candidate
   /// set, using template argument deduction to produce an appropriate member
@@ -10213,7 +10215,8 @@ class Sema final : public SemaBase {
   CXXConversionDecl *Conversion, DeclAccessPair FoundDecl,
   CXXRecordDecl *ActingContext, Expr *From, QualType ToType,
   OverloadCandidateSet &CandidateSet, bool AllowObjCConversionOnExplicit,
-  bool AllowExplicit, bool AllowResultConversion = true);
+  bool AllowExplicit, bool AllowResultConversion = true,
+  bool HasMatchedPackOnParmToNonPackOnArg = false);
 
   /// Adds a conversion function template specialization
   /// candidate to the overload set, using template argument deduction
@@ -11637,7 +11640,7 @@ class Sema final : public SemaBase {
 SourceLocation RAngleLoc, unsigned ArgumentPackIndex,
 SmallVectorImpl &SugaredConverted,
 SmallVectorImpl &CanonicalConverted,
-CheckTemplateArgumentKind CTAK,
+CheckTemplateArgumentKind CTAK, bool PartialOrdering,
 bool *MatchedPackOnParmToNonPackOnArg);
 
   /// Check that the given template arguments can be provided to
@@ -11720,7 +11723,8 @@ class Sema final : public SemaBase {
   /// It returns true 

[llvm-branch-commits] [llvm] GlobalISel: Fix combine duplicating atomic loads (PR #111730)

2024-10-09 Thread Thorsten Schütt via llvm-branch-commits


@@ -1110,6 +1110,7 @@ void CombinerHelper::applySextInRegOfLoad(
   Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
  LoadDef->getPointerReg(), *NewMMO);
   MI.eraseFromParent();
+  LoadDef->eraseFromParent();

tschuett wrote:

Please add a comment why the second `eraseFromParent` is needed.

https://github.com/llvm/llvm-project/pull/111730
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits