[llvm-branch-commits] [compiler-rt] [TySan] Fix struct access with different bases (PR #108385)

2024-09-23 Thread via llvm-branch-commits

https://github.com/gbMattN updated 
https://github.com/llvm/llvm-project/pull/108385

>From 4bde5de87155d131d320ce5db4c9bb69b42d487d Mon Sep 17 00:00:00 2001
From: Matthew Nagy 
Date: Thu, 12 Sep 2024 12:36:57 +
Subject: [PATCH] [TySan] Fix struct access with different bases

---
 compiler-rt/lib/tysan/tysan.cpp   |  4 +++
 .../tysan/struct-offset-different-base.cpp| 31 +++
 2 files changed, 35 insertions(+)
 create mode 100644 compiler-rt/test/tysan/struct-offset-different-base.cpp

diff --git a/compiler-rt/lib/tysan/tysan.cpp b/compiler-rt/lib/tysan/tysan.cpp
index f627851d049e6a..fa37ee5e77bd9a 100644
--- a/compiler-rt/lib/tysan/tysan.cpp
+++ b/compiler-rt/lib/tysan/tysan.cpp
@@ -128,6 +128,10 @@ static bool isAliasingLegalUp(tysan_type_descriptor *TDA,
   break;
   }
 
+  //You can't have negative offset, you must be partially inside the last 
type
+  if (TDA->Struct.Members[Idx].Offset > OffsetA)
+Idx -= 1;
+
   OffsetA -= TDA->Struct.Members[Idx].Offset;
   TDA = TDA->Struct.Members[Idx].Type;
 } else {
diff --git a/compiler-rt/test/tysan/struct-offset-different-base.cpp 
b/compiler-rt/test/tysan/struct-offset-different-base.cpp
new file mode 100644
index 00..716d21f844f96c
--- /dev/null
+++ b/compiler-rt/test/tysan/struct-offset-different-base.cpp
@@ -0,0 +1,31 @@
+// RUN: %clangxx_tysan -O0 %s -o %t && %run %t >%t.out 2>&1
+// RUN: FileCheck %s < %t.out
+
+#include 
+
+struct inner {
+   char buffer;
+   int i;
+};
+
+void init_inner(inner *iPtr) {
+   iPtr->i = 0;
+}
+
+struct outer {
+   inner foo;
+char buffer;
+};
+
+int main(void) {
+outer *l = new outer();
+
+init_inner(&l->foo);
+
+int access_offsets_with_different_base = l->foo.i;
+printf("%d\n", access_offsets_with_different_base);
+
+return 0;
+}
+
+// CHECK-NOT: ERROR: TypeSanitizer: type-aliasing-violation

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port machine trace metrics analysis to new pass manager. (PR #108507)

2024-09-23 Thread Christudasan Devadasan via llvm-branch-commits


@@ -102,19 +103,22 @@ class MachineTraceMetrics : public MachineFunctionPass {
   TargetSchedModel SchedModel;
 
 public:
+  friend class MachineTraceMetricsWrapperPass;
   friend class Ensemble;
   friend class Trace;
 
   class Ensemble;
 
-  static char ID;
+  // For legacy pass.
+  MachineTraceMetrics() {
+std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
+  }

cdevadas wrote:

It isn't possible to move out the Ensembles pointer initialization from the 
constructors. Certain tests crashed while the destructor invokes clear() that 
tries to delete the Ensemble pointers (some garbage value). The default 
constructor for these tests doesn't appropriately clear the object's members. 
The tests that crashed don't contain any function definitions, but only some 
global declarations. So the run() instances won't be invoked for clearing these 
pointers. Also, they should point to the dynamically allocated addresses 
(otherwise the initial null addresses) before the destructor is invoked.


https://github.com/llvm/llvm-project/pull/108507
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] release/19.x: [clang-tidy] Fix crash in modernize-use-ranges (#100427) (PR #101482)

2024-09-23 Thread via llvm-branch-commits

tupos wrote:

Hello,

I think there might be some problem with this PR. I updated to 19.1.0 today and 
started to see crashes with this callstack on random files. So far I was not 
able to create a minimal working repro to let it reproducibly crashing.

Strangely enought it does not always crash on the file, from which I obtained 
the crash. However, maybe you could have an idea what might be wrong.

```
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and 
include the crash backtrace.
 #0 0x005bde8b llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) 
/cpp/llvm-19.1.0/llvm/lib/Support/Unix/Signals.inc:727:3
 #1 0x005bbb94 llvm::sys::RunSignalHandlers() 
/cpp/llvm-19.1.0/llvm/lib/Support/Signals.cpp:105:20
 #2 0x005bbf4e SignalHandler(int) 
/cpp/llvm-19.1.0/llvm/lib/Support/Unix/Signals.inc:413:1
 #3 0x7f9844ae2910 __restore_rt (/lib64/libpthread.so.0+0x16910)
 #4 0x00586ee6 llvm::StringMapImpl::LookupBucketFor(llvm::StringRef, 
unsigned int) /cpp/llvm-19.1.0/llvm/lib/Support/StringMap.cpp:102:25
 #5 0x0264755f 
std::pair>,
 bool> 
llvm::StringMap,
 
llvm::MallocAllocator>::try_emplace_with_hash&>(llvm::StringRef,
 unsigned int, 
llvm::IntrusiveRefCntPtr&) 
/cpp/llvm-19.1.0/llvm/include/llvm/ADT/StringMap.h:376:40
 #6 0x0264755f 
std::pair>,
 bool> 
llvm::StringMap,
 
llvm::MallocAllocator>::try_emplace&>(llvm::StringRef,
 llvm::IntrusiveRefCntPtr&) 
/cpp/llvm-19.1.0/llvm/include/llvm/ADT/StringMap.h:369:33
 #7 0x0264755f clang::tidy::boost::UseRangesCheck::getReplacerMap() 
const::'lambda'(llvm::IntrusiveRefCntPtr,
 std::initializer_list, 
llvm::StringRef)::operator()(llvm::IntrusiveRefCntPtr,
 std::initializer_list, llvm::StringRef) const (.constprop.0) 
/cpp/llvm-19.1.0/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp:213:30
 #8 0x02647b1d 
llvm::RefCountedBase::Release() 
const /cpp/llvm-19.1.0/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h:102:20
 #9 0x02647b1d 
llvm::IntrusiveRefCntPtrInfo::release(clang::tidy::utils::UseRangesCheck::Replacer*)
 /cpp/llvm-19.1.0/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h:164:45
#10 0x02647b1d 
llvm::IntrusiveRefCntPtr::release()
 /cpp/llvm-19.1.0/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h:233:41
#11 0x02647b1d 
llvm::IntrusiveRefCntPtr::~IntrusiveRefCntPtr()
 /cpp/llvm-19.1.0/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h:196:34
#12 0x02647b1d clang::tidy::boost::UseRangesCheck::getReplacerMap() 
const::'lambda'(llvm::IntrusiveRefCntPtr,
 
std::initializer_list)::operator()(llvm::IntrusiveRefCntPtr,
 std::initializer_list) const (.constprop.0.isra.0) 
/cpp/llvm-19.1.0/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp:220:16
#13 0x02649032 
llvm::IntrusiveRefCntPtr::release()
 /cpp/llvm-19.1.0/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h:232:5
#14 0x02649032 
llvm::IntrusiveRefCntPtr::~IntrusiveRefCntPtr()
 /cpp/llvm-19.1.0/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h:196:34
#15 0x02649032 clang::tidy::boost::UseRangesCheck::getReplacerMap() 
const 
/cpp/llvm-19.1.0/clang-tools-extra/clang-tidy/boost/UseRangesCheck.cpp:234:13
#16 0x02c75a6c 
clang::tidy::utils::UseRangesCheck::registerMatchers(clang::ast_matchers::MatchFinder*)
 /cpp/llvm-19.1.0/clang-tools-extra/clang-tidy/utils/UseRangesCheck.cpp:112:43
#17 0x017a86c8 clang::clangd::ParsedAST::build(llvm::StringRef, 
clang::clangd::ParseInputs const&, std::unique_ptr>, 
llvm::ArrayRef, 
std::shared_ptr) 
/cpp/llvm-19.1.0/clang-tools-extra/clangd/ParsedAST.cpp:565:30
#18 0x01823c30 std::unique_ptr>::~unique_ptr() 
/usr/include/c++/13/bits/unique_ptr.h:403:12
#19 0x01823c30 clang::clangd::(anonymous 
namespace)::ASTWorker::generateDiagnostics(std::unique_ptr>, clang::clangd::ParseInputs, 
std::vector>) 
/cpp/llvm-19.1.0/clang-tools-extra/clangd/TUScheduler.cpp:1211:74
#20 0x0182465e std::unique_ptr>::~unique_ptr() 
/usr/include/c++/13/bits/unique_ptr.h:403:12
#21 0x0182465e clang::clangd::(anonymous 
namespace)::ASTWorker::updatePreamble(std::unique_ptr>, clang::clangd::ParseInputs, 
std::shared_ptr, 
std::vector>, 
clang::clangd::WantDiagnostics)::'lambda'()::operator()() (.part.0) 
/cpp/llvm-19.1.0/clang-tools-extra/clangd/TUScheduler.cpp:1144:24
#22 0x0181a273 clang::clangd::(anonymous 
namespace)::ASTWorker::runTask(llvm::StringRef, llvm::function_ref) 
/cpp/llvm-19.1.0/clang-tools-extra/clangd/TUScheduler.cpp:1325:1
#23 0x0181ca16 clang::clangd::(anonymous namespace)::ASTWorker::run() 
/cpp/llvm-19.1.0/clang-tools-extra/clangd/TUScheduler.cpp:1459:5
#24 0x019c5fc0 
llvm::detail::PunnedPointer::TrivialCallback*,
 llvm::detail::UniqueFunctionBase::NonTrivialCallbacks*>>::asInt() const 
/cpp/llvm-19.1.0/llvm/include/llvm/ADT/PointerIntPair.h:41:16
#25 0x019c5fc0 
llvm::detail::PunnedPointer::TrivialCallback*,
 llvm::detail::UniqueFunctionBase::NonTrivialCallbacks*>>::operator 
long() const /c

[llvm-branch-commits] [llvm] [X86][APX] Fix wrong encoding of promoted KMOV instructions due to missing NoCD8 (#109579) (PR #109635)

2024-09-23 Thread Phoebe Wang via llvm-branch-commits

https://github.com/phoebewang created 
https://github.com/llvm/llvm-project/pull/109635

Promoted KMOV* was encoded with CD8 incorrectly, see 
https://godbolt.org/z/cax513hG1

>From b403d2a05b548f24b46bab4c4ae014c9949f3c44 Mon Sep 17 00:00:00 2001
From: Phoebe Wang 
Date: Mon, 23 Sep 2024 09:41:43 +0800
Subject: [PATCH] [X86][APX] Fix wrong encoding of promoted KMOV instructions
 due to missing NoCD8 (#109579)

Promoted KMOV* was encoded with CD8 incorrectly, see
https://godbolt.org/z/cax513hG1
---
 llvm/lib/Target/X86/X86InstrAVX512.td  | 27 +++---
 llvm/test/MC/Disassembler/X86/apx/kmov.txt | 16 +
 llvm/test/MC/X86/apx/kmov-att.s| 14 ++-
 llvm/test/MC/X86/apx/kmov-intel.s  | 12 ++
 4 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td 
b/llvm/lib/Target/X86/X86InstrAVX512.td
index da690aea43f5c0..1bf201f2bb87e4 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2617,19 +2617,20 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 
0x67, SchedWriteFCmp>, E
 multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk,
   string OpcodeStr, RegisterClass KRC, ValueType vvt,
   X86MemOperand x86memop, string Suffix = ""> {
-  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
-  explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, 
ExplicitEVEX) in
-  def kk#Suffix : I,
-  Sched<[WriteMove]>;
-  def km#Suffix : I,
-  Sched<[WriteLoad]>;
-  def mk#Suffix : I,
-  Sched<[WriteStore]>;
+  let explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, 
ExplicitEVEX) in {
+let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
+def kk#Suffix : I,
+Sched<[WriteMove]>;
+def km#Suffix : I,
+Sched<[WriteLoad]>, NoCD8;
+def mk#Suffix : I,
+Sched<[WriteStore]>, NoCD8;
+  }
 }
 
 multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk,
diff --git a/llvm/test/MC/Disassembler/X86/apx/kmov.txt 
b/llvm/test/MC/Disassembler/X86/apx/kmov.txt
index 5d947ff39f2314..45fedbd0da587b 100644
--- a/llvm/test/MC/Disassembler/X86/apx/kmov.txt
+++ b/llvm/test/MC/Disassembler/X86/apx/kmov.txt
@@ -17,6 +17,22 @@
 # INTEL: {evex} kmovq  k2, k1
 0x62,0xf1,0xfc,0x08,0x90,0xd1
 
+# ATT:   {evex} kmovb   -16(%rax), %k0
+# INTEL: {evex} kmovb   k0, byte ptr [rax - 16]
+0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0
+
+# ATT:   {evex} kmovw   -16(%rax), %k0
+# INTEL: {evex} kmovw   k0, word ptr [rax - 16]
+0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0
+
+# ATT:   {evex} kmovd   -16(%rax), %k0
+# INTEL: {evex} kmovd   k0, dword ptr [rax - 16]
+0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0
+
+# ATT:   {evex} kmovq   -16(%rax), %k0
+# INTEL: {evex} kmovq   k0, qword ptr [rax - 16]
+0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0
+
 # ATT-NOT: {evex}
 # INTEL-NOT: {evex}
 
diff --git a/llvm/test/MC/X86/apx/kmov-att.s b/llvm/test/MC/X86/apx/kmov-att.s
index 949ef65be98d4c..5f59e0a505b235 100644
--- a/llvm/test/MC/X86/apx/kmov-att.s
+++ b/llvm/test/MC/X86/apx/kmov-att.s
@@ -1,7 +1,7 @@
 # RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
 # RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s 
--check-prefix=ERROR
 
-# ERROR-COUNT-20: error:
+# ERROR-COUNT-24: error:
 # ERROR-NOT: error:
 # CHECK: {evex}kmovb   %k1, %k2
 # CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1]
@@ -15,6 +15,18 @@
 # CHECK: {evex}kmovq   %k1, %k2
 # CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1]
  {evex}kmovq   %k1, %k2
+# CHECK: {evex} kmovb   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0]
+ {evex} kmovb   -0x10(%rax), %k0
+# CHECK: {evex} kmovw   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0]
+ {evex} kmovw   -0x10(%rax), %k0
+# CHECK: {evex} kmovd   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0]
+ {evex} kmovd   -0x10(%rax), %k0
+# CHECK: {evex} kmovq   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0]
+ {evex} kmovq   -0x10(%rax), %k0
 
 # CHECK-NOT: {evex}
 
diff --git a/llvm/test/MC/X86/apx/kmov-intel.s 
b/llvm/test/MC/X86/apx/kmov-intel.s
index 0cdbd310062eba..51cec67caf9a04 100644
--- a/llvm/test/MC/X86/apx/kmov-intel.s
+++ b/llvm/test/MC/X86/apx/kmov-intel.s
@@ -12,6 +12,18 @@
 # CHECK: {evex}kmovq   k2, k1
 # CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1]
  {evex}kmovq   k2, k1
+# CHECK: {evex} kmovb   k0, byte ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0]
+ {evex} kmovb   k0, byte ptr [rax - 0x10]
+# CHECK: {evex} kmovw   k0, word ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0]
+ {evex} kmovw   k0, word ptr [rax - 0x10]
+# CHECK: {evex} kmovd   k0, dword ptr [rax -

[llvm-branch-commits] [llvm] [X86][APX] Fix wrong encoding of promoted KMOV instructions due to missing NoCD8 (#109579) (PR #109635)

2024-09-23 Thread Phoebe Wang via llvm-branch-commits

https://github.com/phoebewang milestoned 
https://github.com/llvm/llvm-project/pull/109635
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [X86][APX] Fix wrong encoding of promoted KMOV instructions due to missing NoCD8 (#109579) (PR #109635)

2024-09-23 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mc

Author: Phoebe Wang (phoebewang)


Changes

Promoted KMOV* was encoded with CD8 incorrectly, see 
https://godbolt.org/z/cax513hG1

---
Full diff: https://github.com/llvm/llvm-project/pull/109635.diff


4 Files Affected:

- (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+14-13) 
- (modified) llvm/test/MC/Disassembler/X86/apx/kmov.txt (+16) 
- (modified) llvm/test/MC/X86/apx/kmov-att.s (+13-1) 
- (modified) llvm/test/MC/X86/apx/kmov-intel.s (+12) 


``diff
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td 
b/llvm/lib/Target/X86/X86InstrAVX512.td
index da690aea43f5c0..1bf201f2bb87e4 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2617,19 +2617,20 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 
0x67, SchedWriteFCmp>, E
 multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk,
   string OpcodeStr, RegisterClass KRC, ValueType vvt,
   X86MemOperand x86memop, string Suffix = ""> {
-  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
-  explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, 
ExplicitEVEX) in
-  def kk#Suffix : I,
-  Sched<[WriteMove]>;
-  def km#Suffix : I,
-  Sched<[WriteLoad]>;
-  def mk#Suffix : I,
-  Sched<[WriteStore]>;
+  let explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, 
ExplicitEVEX) in {
+let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
+def kk#Suffix : I,
+Sched<[WriteMove]>;
+def km#Suffix : I,
+Sched<[WriteLoad]>, NoCD8;
+def mk#Suffix : I,
+Sched<[WriteStore]>, NoCD8;
+  }
 }
 
 multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk,
diff --git a/llvm/test/MC/Disassembler/X86/apx/kmov.txt 
b/llvm/test/MC/Disassembler/X86/apx/kmov.txt
index 5d947ff39f2314..45fedbd0da587b 100644
--- a/llvm/test/MC/Disassembler/X86/apx/kmov.txt
+++ b/llvm/test/MC/Disassembler/X86/apx/kmov.txt
@@ -17,6 +17,22 @@
 # INTEL: {evex} kmovq  k2, k1
 0x62,0xf1,0xfc,0x08,0x90,0xd1
 
+# ATT:   {evex} kmovb   -16(%rax), %k0
+# INTEL: {evex} kmovb   k0, byte ptr [rax - 16]
+0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0
+
+# ATT:   {evex} kmovw   -16(%rax), %k0
+# INTEL: {evex} kmovw   k0, word ptr [rax - 16]
+0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0
+
+# ATT:   {evex} kmovd   -16(%rax), %k0
+# INTEL: {evex} kmovd   k0, dword ptr [rax - 16]
+0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0
+
+# ATT:   {evex} kmovq   -16(%rax), %k0
+# INTEL: {evex} kmovq   k0, qword ptr [rax - 16]
+0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0
+
 # ATT-NOT: {evex}
 # INTEL-NOT: {evex}
 
diff --git a/llvm/test/MC/X86/apx/kmov-att.s b/llvm/test/MC/X86/apx/kmov-att.s
index 949ef65be98d4c..5f59e0a505b235 100644
--- a/llvm/test/MC/X86/apx/kmov-att.s
+++ b/llvm/test/MC/X86/apx/kmov-att.s
@@ -1,7 +1,7 @@
 # RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
 # RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s 
--check-prefix=ERROR
 
-# ERROR-COUNT-20: error:
+# ERROR-COUNT-24: error:
 # ERROR-NOT: error:
 # CHECK: {evex}kmovb   %k1, %k2
 # CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1]
@@ -15,6 +15,18 @@
 # CHECK: {evex}kmovq   %k1, %k2
 # CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1]
  {evex}kmovq   %k1, %k2
+# CHECK: {evex} kmovb   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0]
+ {evex} kmovb   -0x10(%rax), %k0
+# CHECK: {evex} kmovw   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0]
+ {evex} kmovw   -0x10(%rax), %k0
+# CHECK: {evex} kmovd   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0]
+ {evex} kmovd   -0x10(%rax), %k0
+# CHECK: {evex} kmovq   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0]
+ {evex} kmovq   -0x10(%rax), %k0
 
 # CHECK-NOT: {evex}
 
diff --git a/llvm/test/MC/X86/apx/kmov-intel.s 
b/llvm/test/MC/X86/apx/kmov-intel.s
index 0cdbd310062eba..51cec67caf9a04 100644
--- a/llvm/test/MC/X86/apx/kmov-intel.s
+++ b/llvm/test/MC/X86/apx/kmov-intel.s
@@ -12,6 +12,18 @@
 # CHECK: {evex}kmovq   k2, k1
 # CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1]
  {evex}kmovq   k2, k1
+# CHECK: {evex} kmovb   k0, byte ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0]
+ {evex} kmovb   k0, byte ptr [rax - 0x10]
+# CHECK: {evex} kmovw   k0, word ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0]
+ {evex} kmovw   k0, word ptr [rax - 0x10]
+# CHECK: {evex} kmovd   k0, dword ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0]
+ {evex} kmovd   k0, dword ptr [rax - 0x10]
+# CHECK: {evex} kmovq   k0, qword ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0]
+ {evex} kmovq   k0, qword ptr [rax - 0x10]
 
 # CHECK-NOT: {evex}
 


[llvm-branch-commits] [llvm] [X86][APX] Fix wrong encoding of promoted KMOV instructions due to missing NoCD8 (#109579) (PR #109635)

2024-09-23 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: Phoebe Wang (phoebewang)


Changes

Promoted KMOV* was encoded with CD8 incorrectly, see 
https://godbolt.org/z/cax513hG1

---
Full diff: https://github.com/llvm/llvm-project/pull/109635.diff


4 Files Affected:

- (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+14-13) 
- (modified) llvm/test/MC/Disassembler/X86/apx/kmov.txt (+16) 
- (modified) llvm/test/MC/X86/apx/kmov-att.s (+13-1) 
- (modified) llvm/test/MC/X86/apx/kmov-intel.s (+12) 


``diff
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td 
b/llvm/lib/Target/X86/X86InstrAVX512.td
index da690aea43f5c0..1bf201f2bb87e4 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2617,19 +2617,20 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 
0x67, SchedWriteFCmp>, E
 multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk,
   string OpcodeStr, RegisterClass KRC, ValueType vvt,
   X86MemOperand x86memop, string Suffix = ""> {
-  let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove],
-  explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, 
ExplicitEVEX) in
-  def kk#Suffix : I,
-  Sched<[WriteMove]>;
-  def km#Suffix : I,
-  Sched<[WriteLoad]>;
-  def mk#Suffix : I,
-  Sched<[WriteStore]>;
+  let explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, 
ExplicitEVEX) in {
+let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
+def kk#Suffix : I,
+Sched<[WriteMove]>;
+def km#Suffix : I,
+Sched<[WriteLoad]>, NoCD8;
+def mk#Suffix : I,
+Sched<[WriteStore]>, NoCD8;
+  }
 }
 
 multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk,
diff --git a/llvm/test/MC/Disassembler/X86/apx/kmov.txt 
b/llvm/test/MC/Disassembler/X86/apx/kmov.txt
index 5d947ff39f2314..45fedbd0da587b 100644
--- a/llvm/test/MC/Disassembler/X86/apx/kmov.txt
+++ b/llvm/test/MC/Disassembler/X86/apx/kmov.txt
@@ -17,6 +17,22 @@
 # INTEL: {evex} kmovq  k2, k1
 0x62,0xf1,0xfc,0x08,0x90,0xd1
 
+# ATT:   {evex} kmovb   -16(%rax), %k0
+# INTEL: {evex} kmovb   k0, byte ptr [rax - 16]
+0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0
+
+# ATT:   {evex} kmovw   -16(%rax), %k0
+# INTEL: {evex} kmovw   k0, word ptr [rax - 16]
+0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0
+
+# ATT:   {evex} kmovd   -16(%rax), %k0
+# INTEL: {evex} kmovd   k0, dword ptr [rax - 16]
+0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0
+
+# ATT:   {evex} kmovq   -16(%rax), %k0
+# INTEL: {evex} kmovq   k0, qword ptr [rax - 16]
+0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0
+
 # ATT-NOT: {evex}
 # INTEL-NOT: {evex}
 
diff --git a/llvm/test/MC/X86/apx/kmov-att.s b/llvm/test/MC/X86/apx/kmov-att.s
index 949ef65be98d4c..5f59e0a505b235 100644
--- a/llvm/test/MC/X86/apx/kmov-att.s
+++ b/llvm/test/MC/X86/apx/kmov-att.s
@@ -1,7 +1,7 @@
 # RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s
 # RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s 
--check-prefix=ERROR
 
-# ERROR-COUNT-20: error:
+# ERROR-COUNT-24: error:
 # ERROR-NOT: error:
 # CHECK: {evex}kmovb   %k1, %k2
 # CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1]
@@ -15,6 +15,18 @@
 # CHECK: {evex}kmovq   %k1, %k2
 # CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1]
  {evex}kmovq   %k1, %k2
+# CHECK: {evex} kmovb   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0]
+ {evex} kmovb   -0x10(%rax), %k0
+# CHECK: {evex} kmovw   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0]
+ {evex} kmovw   -0x10(%rax), %k0
+# CHECK: {evex} kmovd   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0]
+ {evex} kmovd   -0x10(%rax), %k0
+# CHECK: {evex} kmovq   -16(%rax), %k0
+# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0]
+ {evex} kmovq   -0x10(%rax), %k0
 
 # CHECK-NOT: {evex}
 
diff --git a/llvm/test/MC/X86/apx/kmov-intel.s 
b/llvm/test/MC/X86/apx/kmov-intel.s
index 0cdbd310062eba..51cec67caf9a04 100644
--- a/llvm/test/MC/X86/apx/kmov-intel.s
+++ b/llvm/test/MC/X86/apx/kmov-intel.s
@@ -12,6 +12,18 @@
 # CHECK: {evex}kmovq   k2, k1
 # CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1]
  {evex}kmovq   k2, k1
+# CHECK: {evex} kmovb   k0, byte ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0]
+ {evex} kmovb   k0, byte ptr [rax - 0x10]
+# CHECK: {evex} kmovw   k0, word ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0]
+ {evex} kmovw   k0, word ptr [rax - 0x10]
+# CHECK: {evex} kmovd   k0, dword ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0]
+ {evex} kmovd   k0, dword ptr [rax - 0x10]
+# CHECK: {evex} kmovq   k0, qword ptr [rax - 16]
+# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0]
+ {evex} kmovq   k0, qword ptr [rax - 0x10]
 
 # CHECK-NOT: {

[llvm-branch-commits] [llvm] AMDGPU: Expand flat atomics that may access private memory (PR #109407)

2024-09-23 Thread Stanislav Mekhanoshin via llvm-branch-commits

rampitec wrote:

> > Is it legal and defined behavior to target private memory with an atomic?
> 
> In the IR it would have to be, and this is the expected behavior in OpenMP 
> and C++. It's UB in OpenCL, and UB in CUDA/HIP for old style atomics, but 
> defined for new std::atomic style cases

Is there a plan that OpenCL and HIP FE will produce noalias metadata to avoid 
the expansion?

https://github.com/llvm/llvm-project/pull/109407
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Expand flat atomics that may access private memory (PR #109407)

2024-09-23 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> Is there a plan that OpenCL and HIP FE will produce noalias metadata to avoid 
> the expansion?

That's #102462 (although I think it needs refinement to avoid setting it in the 
std::atomic case)

https://github.com/llvm/llvm-project/pull/109407
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Expand flat atomics that may access private memory (PR #109407)

2024-09-23 Thread Stanislav Mekhanoshin via llvm-branch-commits

https://github.com/rampitec approved this pull request.

Thanks. Can this be landed after 
https://github.com/llvm/llvm-project/pull/102462?

https://github.com/llvm/llvm-project/pull/109407
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-09-23 Thread Ivan R. Ivanov via llvm-branch-commits

ivanradanov wrote:

I have added a comment explaining the limitation of not allowing CFG in 
workshare for now and an appropriate TODO message for that.

Regarding documenting the current supported statements in workshare - this 
specific commit will still result in the trivial omp.single lowering - for the 
subsequent PRs which actually enable various types of statements in the 
workshare construct, I will document what they enable. 
(https://github.com/llvm/llvm-project/pull/104748)

https://github.com/llvm/llvm-project/pull/101446
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-09-23 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101446

>From e56dbd6a0625890fd9a3d6a62675e864ca94a8f5 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 22:06:55 +0900
Subject: [PATCH 1/4] [flang] Lower omp.workshare to other omp constructs

Change to workshare loop wrapper op

Move single op declaration

Schedule pass properly

Correctly handle nested nested loop nests to be parallelized by workshare

Leave comments for shouldUseWorkshareLowering

Use copyprivate to scatter val from omp.single

TODO still need to implement copy function
TODO transitive check for usage outside of omp.single not imiplemented yet

Transitively check for users outisde of single op

TODO need to implement copy func
TODO need to hoist allocas outside of single regions

Add tests

Hoist allocas

More tests

Emit body for copy func

Test the tmp storing logic

Clean up trivially dead ops

Only handle single-block regions for now

Fix tests for custom assembly for loop wrapper

Only run the lower workshare pass if openmp is enabled

Implement some missing functionality

Fix tests

Fix test

Iterate backwards to find all trivially dead ops

Add expalanation comment for createCopyFun

Update test
---
 flang/include/flang/Optimizer/OpenMP/Passes.h |   5 +
 .../include/flang/Optimizer/OpenMP/Passes.td  |   5 +
 flang/include/flang/Tools/CLOptions.inc   |   6 +-
 flang/include/flang/Tools/CrossToolHelpers.h  |   1 +
 flang/lib/Frontend/FrontendActions.cpp|  10 +-
 flang/lib/Optimizer/OpenMP/CMakeLists.txt |   1 +
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 446 ++
 flang/test/Fir/basic-program.fir  |   1 +
 .../Transforms/OpenMP/lower-workshare.mlir| 189 
 .../Transforms/OpenMP/lower-workshare2.mlir   |  23 +
 .../Transforms/OpenMP/lower-workshare3.mlir   |  74 +++
 .../Transforms/OpenMP/lower-workshare4.mlir   |  59 +++
 .../Transforms/OpenMP/lower-workshare5.mlir   |  42 ++
 .../Transforms/OpenMP/lower-workshare6.mlir   |  51 ++
 flang/tools/bbc/bbc.cpp   |   5 +-
 flang/tools/tco/tco.cpp   |   1 +
 16 files changed, 915 insertions(+), 4 deletions(-)
 create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare2.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare3.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare4.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare5.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare6.mlir

diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h 
b/flang/include/flang/Optimizer/OpenMP/Passes.h
index 403d79667bf448..feb395f1a12dbd 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.h
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.h
@@ -25,6 +25,11 @@ namespace flangomp {
 #define GEN_PASS_REGISTRATION
 #include "flang/Optimizer/OpenMP/Passes.h.inc"
 
+/// Impelements the logic specified in the 2.8.3  workshare Construct section 
of
+/// the OpenMP standard which specifies what statements or constructs shall be
+/// divided into units of work.
+bool shouldUseWorkshareLowering(mlir::Operation *op);
+
 } // namespace flangomp
 
 #endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td 
b/flang/include/flang/Optimizer/OpenMP/Passes.td
index 395178e26a5762..041240cad12eb3 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.td
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.td
@@ -37,4 +37,9 @@ def FunctionFiltering : Pass<"omp-function-filtering"> {
   ];
 }
 
+// Needs to be scheduled on Module as we create functions in it
+def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> {
+  let summary = "Lower workshare construct";
+}
+
 #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc 
b/flang/include/flang/Tools/CLOptions.inc
index 1881e23b00045a..bb00e079008a0b 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -337,7 +337,7 @@ inline void createDefaultFIROptimizerPassPipeline(
 /// \param optLevel - optimization level used for creating FIR optimization
 ///   passes pipeline
 inline void createHLFIRToFIRPassPipeline(
-mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) 
{
+mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel 
= defaultOptLevel) {
   if (optLevel.isOptimizingForSpeed()) {
 addCanonicalizerPassWithoutRegionSimplification(pm);
 addNestedPassToAllTopLevelOperations(
@@ -354,6 +354,8 @@ inline void createHLFIRToFIRPassPipeline(
   pm.addPass(hlfir::createLowerHLFIRIntrinsics());
   pm.addPass(hlfir::createBufferizeHLFIR());
   pm.addPass(hlfir::createConvertHLFIRtoFIR());
+  if (enableOpenMP)
+pm.add

[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-09-23 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/104748

>From b710a580ab1732b3b41f0e3fb0684b45108d2c09 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Mon, 23 Sep 2024 16:04:07 +0900
Subject: [PATCH 1/3] clang-format

---
 flang/include/flang/Tools/CLOptions.inc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/include/flang/Tools/CLOptions.inc 
b/flang/include/flang/Tools/CLOptions.inc
index bb00e079008a0b..81ce69b4ec7e1b 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -336,8 +336,8 @@ inline void createDefaultFIROptimizerPassPipeline(
 /// \param pm - MLIR pass manager that will hold the pipeline definition
 /// \param optLevel - optimization level used for creating FIR optimization
 ///   passes pipeline
-inline void createHLFIRToFIRPassPipeline(
-mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel 
= defaultOptLevel) {
+inline void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
+bool enableOpenMP, llvm::OptimizationLevel optLevel = defaultOptLevel) {
   if (optLevel.isOptimizingForSpeed()) {
 addCanonicalizerPassWithoutRegionSimplification(pm);
 addNestedPassToAllTopLevelOperations(

>From 93ce40863c66da660e44f89fe8a984132077843c Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 17:33:52 +0900
Subject: [PATCH 2/3] Add workshare loop wrapper lowerings

Bufferize test

Bufferize test

Bufferize test

Add test for should use workshare lowering
---
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |   4 +-
 .../Transforms/OptimizedBufferization.cpp |  10 +-
 flang/test/HLFIR/bufferize-workshare.fir  |  58 
 .../OpenMP/should-use-workshare-lowering.mlir | 140 ++
 4 files changed, 208 insertions(+), 4 deletions(-)
 create mode 100644 flang/test/HLFIR/bufferize-workshare.fir
 create mode 100644 
flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp 
b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 07794828fce267..1848dbe2c7a2c2 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -26,6 +26,7 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/PatternMatch.h"
@@ -792,7 +793,8 @@ struct ElementalOpConversion
 // Generate a loop nest looping around the fir.elemental shape and clone
 // fir.elemental region inside the inner loop.
 hlfir::LoopNest loopNest =
-hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+   flangomp::shouldUseWorkshareLowering(elemental));
 auto insPt = builder.saveInsertionPoint();
 builder.setInsertionPointToStart(loopNest.body);
 auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp 
b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 3a0a98dc594463..f014724861e333 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,6 +20,7 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
 #include "flang/Optimizer/Transforms/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Dominance.h"
@@ -482,7 +483,8 @@ llvm::LogicalResult 
ElementalAssignBufferization::matchAndRewrite(
   // Generate a loop nest looping around the hlfir.elemental shape and clone
   // hlfir.elemental region inside the inner loop
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+  hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+ flangomp::shouldUseWorkshareLowering(elemental));
   builder.setInsertionPointToStart(loopNest.body);
   auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
 loopNest.oneBasedIndices);
@@ -553,7 +555,8 @@ llvm::LogicalResult 
BroadcastAssignBufferization::matchAndRewrite(
   llvm::SmallVector extents =
   hlfir::getIndexExtents(loc, builder, shape);
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
+  hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+ flangomp::shouldUseWorkshareLowering(assign));
   builder.setInsertionPointToStart(loopNest.b

[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)

2024-09-23 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/101446

>From e56dbd6a0625890fd9a3d6a62675e864ca94a8f5 Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 22:06:55 +0900
Subject: [PATCH 1/5] [flang] Lower omp.workshare to other omp constructs

Change to workshare loop wrapper op

Move single op declaration

Schedule pass properly

Correctly handle nested nested loop nests to be parallelized by workshare

Leave comments for shouldUseWorkshareLowering

Use copyprivate to scatter val from omp.single

TODO still need to implement copy function
TODO transitive check for usage outside of omp.single not imiplemented yet

Transitively check for users outisde of single op

TODO need to implement copy func
TODO need to hoist allocas outside of single regions

Add tests

Hoist allocas

More tests

Emit body for copy func

Test the tmp storing logic

Clean up trivially dead ops

Only handle single-block regions for now

Fix tests for custom assembly for loop wrapper

Only run the lower workshare pass if openmp is enabled

Implement some missing functionality

Fix tests

Fix test

Iterate backwards to find all trivially dead ops

Add expalanation comment for createCopyFun

Update test
---
 flang/include/flang/Optimizer/OpenMP/Passes.h |   5 +
 .../include/flang/Optimizer/OpenMP/Passes.td  |   5 +
 flang/include/flang/Tools/CLOptions.inc   |   6 +-
 flang/include/flang/Tools/CrossToolHelpers.h  |   1 +
 flang/lib/Frontend/FrontendActions.cpp|  10 +-
 flang/lib/Optimizer/OpenMP/CMakeLists.txt |   1 +
 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 446 ++
 flang/test/Fir/basic-program.fir  |   1 +
 .../Transforms/OpenMP/lower-workshare.mlir| 189 
 .../Transforms/OpenMP/lower-workshare2.mlir   |  23 +
 .../Transforms/OpenMP/lower-workshare3.mlir   |  74 +++
 .../Transforms/OpenMP/lower-workshare4.mlir   |  59 +++
 .../Transforms/OpenMP/lower-workshare5.mlir   |  42 ++
 .../Transforms/OpenMP/lower-workshare6.mlir   |  51 ++
 flang/tools/bbc/bbc.cpp   |   5 +-
 flang/tools/tco/tco.cpp   |   1 +
 16 files changed, 915 insertions(+), 4 deletions(-)
 create mode 100644 flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare2.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare3.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare4.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare5.mlir
 create mode 100644 flang/test/Transforms/OpenMP/lower-workshare6.mlir

diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.h 
b/flang/include/flang/Optimizer/OpenMP/Passes.h
index 403d79667bf448..feb395f1a12dbd 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.h
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.h
@@ -25,6 +25,11 @@ namespace flangomp {
 #define GEN_PASS_REGISTRATION
 #include "flang/Optimizer/OpenMP/Passes.h.inc"
 
+/// Impelements the logic specified in the 2.8.3  workshare Construct section 
of
+/// the OpenMP standard which specifies what statements or constructs shall be
+/// divided into units of work.
+bool shouldUseWorkshareLowering(mlir::Operation *op);
+
 } // namespace flangomp
 
 #endif // FORTRAN_OPTIMIZER_OPENMP_PASSES_H
diff --git a/flang/include/flang/Optimizer/OpenMP/Passes.td 
b/flang/include/flang/Optimizer/OpenMP/Passes.td
index 395178e26a5762..041240cad12eb3 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.td
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.td
@@ -37,4 +37,9 @@ def FunctionFiltering : Pass<"omp-function-filtering"> {
   ];
 }
 
+// Needs to be scheduled on Module as we create functions in it
+def LowerWorkshare : Pass<"lower-workshare", "::mlir::ModuleOp"> {
+  let summary = "Lower workshare construct";
+}
+
 #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES
diff --git a/flang/include/flang/Tools/CLOptions.inc 
b/flang/include/flang/Tools/CLOptions.inc
index 1881e23b00045a..bb00e079008a0b 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -337,7 +337,7 @@ inline void createDefaultFIROptimizerPassPipeline(
 /// \param optLevel - optimization level used for creating FIR optimization
 ///   passes pipeline
 inline void createHLFIRToFIRPassPipeline(
-mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) 
{
+mlir::PassManager &pm, bool enableOpenMP, llvm::OptimizationLevel optLevel 
= defaultOptLevel) {
   if (optLevel.isOptimizingForSpeed()) {
 addCanonicalizerPassWithoutRegionSimplification(pm);
 addNestedPassToAllTopLevelOperations(
@@ -354,6 +354,8 @@ inline void createHLFIRToFIRPassPipeline(
   pm.addPass(hlfir::createLowerHLFIRIntrinsics());
   pm.addPass(hlfir::createBufferizeHLFIR());
   pm.addPass(hlfir::createConvertHLFIRtoFIR());
+  if (enableOpenMP)
+pm.add

[llvm-branch-commits] [hwasan] Check order of mapping flags (PR #109621)

2024-09-23 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Vitaly Buka (vitalybuka)


Changes

Flags "-hwasan-mapping-offset" and
"-hwasan-mapping-offset-dynamic" are mutually
exclusive, use the last one.


---
Full diff: https://github.com/llvm/llvm-project/pull/109621.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (+8-4) 
- (added) llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll 
(+50) 


``diff
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index e386fa5d50b4d6..a70bfd121a2647 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1938,14 +1938,18 @@ void HWAddressSanitizer::ShadowMapping::init(Triple 
&TargetTriple,
 // Fuchsia is always PIE, which means that the beginning of the address
 // space is always available.
 SetFixed(0);
-  } else if (ClMappingOffset.getNumOccurrences() > 0) {
-SetFixed(ClMappingOffset);
   } else if (ClEnableKhwasan || InstrumentWithCalls) {
 SetFixed(0);
 WithFrameRecord = false;
-  } else if (ClMappingOffsetDynamic.getNumOccurrences() > 0) {
-Kind = ClMappingOffsetDynamic;
   }
 
   WithFrameRecord = optOr(ClFrameRecords, WithFrameRecord);
+
+  // Apply the last of ClMappingOffset and ClMappingOffsetDynamic.
+  Kind = optOr(ClMappingOffsetDynamic, Kind);
+  if (ClMappingOffset.getNumOccurrences() > 0 &&
+  !(ClMappingOffsetDynamic.getNumOccurrences() > 0 &&
+ClMappingOffsetDynamic.getPosition() > ClMappingOffset.getPosition())) 
{
+SetFixed(ClMappingOffset);
+  }
 }
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll 
b/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll
new file mode 100644
index 00..5cd23f3ebe2b07
--- /dev/null
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 2
+
+; RUN: opt < %s -passes=hwasan -S | FileCheck %s
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=global -S | 
FileCheck %s --check-prefixes=GLOBAL
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=567 -S | FileCheck %s 
--check-prefixes=FIXED
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=567 
-hwasan-mapping-offset-dynamic=global -S | FileCheck %s 
--check-prefixes=FIXED-GLOBAL
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=global 
-hwasan-mapping-offset=567 -S | FileCheck %s --check-prefixes=GLOBAL-FIXED
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android"
+
+define i8 @test_load8(ptr %a) sanitize_hwaddress {
+; CHECK-LABEL: define i8 @test_load8
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:[[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr 
@__hwasan_shadow)
+; CHECK-NEXT:call void @llvm.hwasan.check.memaccess(ptr 
[[DOTHWASAN_SHADOW]], ptr [[A]], i32 0)
+; CHECK-NEXT:[[B:%.*]] = load i8, ptr [[A]], align 4
+; CHECK-NEXT:ret i8 [[B]]
+;
+; GLOBAL-LABEL: define i8 @test_load8
+; GLOBAL-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; GLOBAL-NEXT:[[TMP1:%.*]] = load ptr, ptr 
@__hwasan_shadow_memory_dynamic_address, align 8
+; GLOBAL-NEXT:call void @llvm.hwasan.check.memaccess(ptr [[TMP1]], ptr 
[[A]], i32 0)
+; GLOBAL-NEXT:[[B:%.*]] = load i8, ptr [[A]], align 4
+; GLOBAL-NEXT:ret i8 [[B]]
+;
+; FIXED-LABEL: define i8 @test_load8
+; FIXED-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; FIXED-NEXT:[[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr 
inttoptr (i64 567 to ptr))
+; FIXED-NEXT:call void @llvm.hwasan.check.memaccess(ptr 
[[DOTHWASAN_SHADOW]], ptr [[A]], i32 0)
+; FIXED-NEXT:[[B:%.*]] = load i8, ptr [[A]], align 4
+; FIXED-NEXT:ret i8 [[B]]
+;
+; FIXED-GLOBAL-LABEL: define i8 @test_load8
+; FIXED-GLOBAL-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; FIXED-GLOBAL-NEXT:[[TMP1:%.*]] = load ptr, ptr 
@__hwasan_shadow_memory_dynamic_address, align 8
+; FIXED-GLOBAL-NEXT:call void @llvm.hwasan.check.memaccess(ptr [[TMP1]], 
ptr [[A]], i32 0)
+; FIXED-GLOBAL-NEXT:[[B:%.*]] = load i8, ptr [[A]], align 4
+; FIXED-GLOBAL-NEXT:ret i8 [[B]]
+;
+; GLOBAL-FIXED-LABEL: define i8 @test_load8
+; GLOBAL-FIXED-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; GLOBAL-FIXED-NEXT:[[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr 
inttoptr (i64 567 to ptr))
+; GLOBAL-FIXED-NEXT:call void @llvm.hwasan.check.memaccess(ptr 
[[DOTHWASAN_SHADOW]], ptr [[A]], i32 0)
+; GLOBAL-FIXED-NEXT:[[B:%.*]] = load i8, ptr [[A]], align 4
+; GLOBAL-FIXED-NEXT:ret i8 [[B]]
+;
+  %b = load i8, ptr %a, align 4
+  ret i8 %b
+}

``




https://github.com/llvm/llvm-project/pull/109621
___
llvm-bra

[llvm-branch-commits] [hwasan] Check order of mapping flags (PR #109621)

2024-09-23 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Vitaly Buka (vitalybuka)


Changes

Flags "-hwasan-mapping-offset" and
"-hwasan-mapping-offset-dynamic" are mutually
exclusive, use the last one.


---
Full diff: https://github.com/llvm/llvm-project/pull/109621.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (+8-4) 
- (added) llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll 
(+50) 


``diff
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index e386fa5d50b4d6..a70bfd121a2647 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1938,14 +1938,18 @@ void HWAddressSanitizer::ShadowMapping::init(Triple 
&TargetTriple,
 // Fuchsia is always PIE, which means that the beginning of the address
 // space is always available.
 SetFixed(0);
-  } else if (ClMappingOffset.getNumOccurrences() > 0) {
-SetFixed(ClMappingOffset);
   } else if (ClEnableKhwasan || InstrumentWithCalls) {
 SetFixed(0);
 WithFrameRecord = false;
-  } else if (ClMappingOffsetDynamic.getNumOccurrences() > 0) {
-Kind = ClMappingOffsetDynamic;
   }
 
   WithFrameRecord = optOr(ClFrameRecords, WithFrameRecord);
+
+  // Apply the last of ClMappingOffset and ClMappingOffsetDynamic.
+  Kind = optOr(ClMappingOffsetDynamic, Kind);
+  if (ClMappingOffset.getNumOccurrences() > 0 &&
+  !(ClMappingOffsetDynamic.getNumOccurrences() > 0 &&
+ClMappingOffsetDynamic.getPosition() > ClMappingOffset.getPosition())) 
{
+SetFixed(ClMappingOffset);
+  }
 }
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll 
b/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll
new file mode 100644
index 00..5cd23f3ebe2b07
--- /dev/null
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/mapping-override.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 2
+
+; RUN: opt < %s -passes=hwasan -S | FileCheck %s
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=global -S | 
FileCheck %s --check-prefixes=GLOBAL
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=567 -S | FileCheck %s 
--check-prefixes=FIXED
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=567 
-hwasan-mapping-offset-dynamic=global -S | FileCheck %s 
--check-prefixes=FIXED-GLOBAL
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=global 
-hwasan-mapping-offset=567 -S | FileCheck %s --check-prefixes=GLOBAL-FIXED
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android"
+
+define i8 @test_load8(ptr %a) sanitize_hwaddress {
+; CHECK-LABEL: define i8 @test_load8
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:[[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr 
@__hwasan_shadow)
+; CHECK-NEXT:call void @llvm.hwasan.check.memaccess(ptr 
[[DOTHWASAN_SHADOW]], ptr [[A]], i32 0)
+; CHECK-NEXT:[[B:%.*]] = load i8, ptr [[A]], align 4
+; CHECK-NEXT:ret i8 [[B]]
+;
+; GLOBAL-LABEL: define i8 @test_load8
+; GLOBAL-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; GLOBAL-NEXT:[[TMP1:%.*]] = load ptr, ptr 
@__hwasan_shadow_memory_dynamic_address, align 8
+; GLOBAL-NEXT:call void @llvm.hwasan.check.memaccess(ptr [[TMP1]], ptr 
[[A]], i32 0)
+; GLOBAL-NEXT:[[B:%.*]] = load i8, ptr [[A]], align 4
+; GLOBAL-NEXT:ret i8 [[B]]
+;
+; FIXED-LABEL: define i8 @test_load8
+; FIXED-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; FIXED-NEXT:[[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr 
inttoptr (i64 567 to ptr))
+; FIXED-NEXT:call void @llvm.hwasan.check.memaccess(ptr 
[[DOTHWASAN_SHADOW]], ptr [[A]], i32 0)
+; FIXED-NEXT:[[B:%.*]] = load i8, ptr [[A]], align 4
+; FIXED-NEXT:ret i8 [[B]]
+;
+; FIXED-GLOBAL-LABEL: define i8 @test_load8
+; FIXED-GLOBAL-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; FIXED-GLOBAL-NEXT:[[TMP1:%.*]] = load ptr, ptr 
@__hwasan_shadow_memory_dynamic_address, align 8
+; FIXED-GLOBAL-NEXT:call void @llvm.hwasan.check.memaccess(ptr [[TMP1]], 
ptr [[A]], i32 0)
+; FIXED-GLOBAL-NEXT:[[B:%.*]] = load i8, ptr [[A]], align 4
+; FIXED-GLOBAL-NEXT:ret i8 [[B]]
+;
+; GLOBAL-FIXED-LABEL: define i8 @test_load8
+; GLOBAL-FIXED-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; GLOBAL-FIXED-NEXT:[[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr 
inttoptr (i64 567 to ptr))
+; GLOBAL-FIXED-NEXT:call void @llvm.hwasan.check.memaccess(ptr 
[[DOTHWASAN_SHADOW]], ptr [[A]], i32 0)
+; GLOBAL-FIXED-NEXT:[[B:%.*]] = load i8, ptr [[A]], align 4
+; GLOBAL-FIXED-NEXT:ret i8 [[B]]
+;
+  %b = load i8, ptr %a, align 4
+  ret i8 %b
+}

``




https://github.com/llvm/llvm-project/pull/109621
___
ll

[llvm-branch-commits] [NFC][hwasan] Use `enum class` in `ShadowMapping` (PR #109617)

2024-09-23 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Vitaly Buka (vitalybuka)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/109617.diff


1 Files Affected:

- (modified) llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp 
(+30-43) 


``diff
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index be0ead40b573d8..2efb97c8759bc9 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -64,6 +64,7 @@
 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include 
 #include 
 #include 
 
@@ -83,8 +84,6 @@ const char kHwasanShadowMemoryDynamicAddress[] =
 static const size_t kNumberOfAccessSizes = 5;
 
 static const size_t kDefaultShadowScale = 4;
-static const uint64_t kDynamicShadowSentinel =
-std::numeric_limits::max();
 
 static const unsigned kShadowBaseAlignment = 32;
 
@@ -385,44 +384,44 @@ class HWAddressSanitizer {
   std::unique_ptr Rng;
 
   /// This struct defines the shadow mapping using the rule:
+  /// If `kFixed`, then
   ///   shadow = (mem >> Scale) + Offset.
-  /// If InGlobal is true, then
+  /// If `kGlobal`, then
+  ///   extern char* __hwasan_shadow_memory_dynamic_address;
+  ///   shadow = (mem >> Scale) + __hwasan_shadow_memory_dynamic_address
+  /// If `kIfunc`, then
   ///   extern char __hwasan_shadow[];
   ///   shadow = (mem >> Scale) + &__hwasan_shadow
-  /// If InTls is true, then
+  /// If `kTls`, then
   ///   extern char *__hwasan_tls;
   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
   ///
   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
   /// ring buffer for storing stack allocations on targets that support it.
   class ShadowMapping {
-uint8_t Scale;
+enum class OffsetKind {
+  kFixed = 0,
+  kGlobal,
+  kIfunc,
+  kTls,
+};
+OffsetKind Kind;
 uint64_t Offset;
-bool InGlobal;
-bool InTls;
+uint8_t Scale;
 bool WithFrameRecord;
 
+void SetFixed(uint64_t O) {
+  Kind = OffsetKind::kFixed;
+  Offset = O;
+}
+
   public:
 void init(Triple &TargetTriple, bool InstrumentWithCalls);
 Align getObjectAlignment() const { return Align(1ULL << Scale); }
-bool isInGlobal() const {
-  return !InGlobal && !InTls && Offset == kDynamicShadowSentinel;
-}
-bool isInifunc() const {
-  assert(!InGlobal || !InTls);
-  assert(!InGlobal || Offset == kDynamicShadowSentinel);
-  return InGlobal;
-}
-bool isInTls() const {
-  assert(!InTls || !InGlobal);
-  assert(!InTls || Offset == kDynamicShadowSentinel);
-  return InTls;
-}
-bool isFixed() const {
-  assert(Offset == kDynamicShadowSentinel || !InTls);
-  assert(Offset == kDynamicShadowSentinel || !InGlobal);
-  return Offset != kDynamicShadowSentinel;
-}
+bool isInGlobal() const { return Kind == OffsetKind::kGlobal; }
+bool isInifunc() const { return Kind == OffsetKind::kIfunc; }
+bool isInTls() const { return Kind == OffsetKind::kTls; }
+bool isFixed() const { return Kind == OffsetKind::kFixed; }
 uint8_t scale() const { return Scale; };
 uint64_t offset() const {
   assert(isFixed());
@@ -1930,34 +1929,22 @@ void HWAddressSanitizer::ShadowMapping::init(Triple 
&TargetTriple,
   if (TargetTriple.isOSFuchsia()) {
 // Fuchsia is always PIE, which means that the beginning of the address
 // space is always available.
-InGlobal = false;
-InTls = false;
-Offset = 0;
+SetFixed(0);
 WithFrameRecord = true;
   } else if (ClMappingOffset.getNumOccurrences() > 0) {
-InGlobal = false;
-InTls = false;
-Offset = ClMappingOffset;
+SetFixed(ClMappingOffset);
 WithFrameRecord = false;
   } else if (ClEnableKhwasan || InstrumentWithCalls) {
-InGlobal = false;
-InTls = false;
-Offset = 0;
+SetFixed(0);
 WithFrameRecord = false;
   } else if (ClWithIfunc) {
-InGlobal = true;
-InTls = false;
-Offset = kDynamicShadowSentinel;
+Kind = OffsetKind::kIfunc;
 WithFrameRecord = false;
   } else if (ClWithTls) {
-InGlobal = false;
-InTls = true;
-Offset = kDynamicShadowSentinel;
+Kind = OffsetKind::kTls;
 WithFrameRecord = true;
   } else {
-InGlobal = false;
-InTls = false;
-Offset = kDynamicShadowSentinel;
+Kind = OffsetKind::kGlobal;
 WithFrameRecord = false;
   }
 }

``




https://github.com/llvm/llvm-project/pull/109617
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][hwasan] Remove code duplication in ShadowMapping::init (PR #109618)

2024-09-23 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Vitaly Buka (vitalybuka)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/109618.diff


1 Files Affected:

- (modified) llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (+6-5) 


``diff
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 2efb97c8759bc9..b82bb353793e7f 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1925,12 +1925,16 @@ void 
HWAddressSanitizer::instrumentPersonalityFunctions() {
 
 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
  bool InstrumentWithCalls) {
+  // Start with defaults.
   Scale = kDefaultShadowScale;
+  Kind = OffsetKind::kTls;
+  WithFrameRecord = true;
+
+  // Tune for the target.
   if (TargetTriple.isOSFuchsia()) {
 // Fuchsia is always PIE, which means that the beginning of the address
 // space is always available.
 SetFixed(0);
-WithFrameRecord = true;
   } else if (ClMappingOffset.getNumOccurrences() > 0) {
 SetFixed(ClMappingOffset);
 WithFrameRecord = false;
@@ -1940,10 +1944,7 @@ void HWAddressSanitizer::ShadowMapping::init(Triple 
&TargetTriple,
   } else if (ClWithIfunc) {
 Kind = OffsetKind::kIfunc;
 WithFrameRecord = false;
-  } else if (ClWithTls) {
-Kind = OffsetKind::kTls;
-WithFrameRecord = true;
-  } else {
+  } else if (!ClWithTls) {
 Kind = OffsetKind::kGlobal;
 WithFrameRecord = false;
   }

``




https://github.com/llvm/llvm-project/pull/109618
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [hwasan] Check order of mapping flags (PR #109621)

2024-09-23 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka created 
https://github.com/llvm/llvm-project/pull/109621

Flags "-hwasan-mapping-offset" and
"-hwasan-mapping-offset-dynamic" are mutually
exclusive, use the last one.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [WIP][flang] Introduce HLFIR lowerings to omp.workshare_loop_nest (PR #104748)

2024-09-23 Thread Ivan R. Ivanov via llvm-branch-commits

https://github.com/ivanradanov updated 
https://github.com/llvm/llvm-project/pull/104748

>From df6bd4282f21590b9d086608cd2cc136b18d54df Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Mon, 23 Sep 2024 16:25:55 +0900
Subject: [PATCH 1/3] Fix todo tests

---
 flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir | 2 +-
 flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir 
b/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir
index 1c47d448f597d9..d10996167ae623 100644
--- a/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir
+++ b/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg-dom.mlir
@@ -1,4 +1,4 @@
-// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s 2>&1 | 
FileCheck %s
+// RUN: %not_todo_cmd fir-opt --lower-workshare --allow-unregistered-dialect 
%s 2>&1 | FileCheck %s
 
 // CHECK: not yet implemented: omp workshare with unstructured control flow
 
diff --git a/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir 
b/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir
index bf6c196a05b4a3..46d2a8e8d48a8a 100644
--- a/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir
+++ b/flang/test/Transforms/OpenMP/lower-workshare-todo-cfg.mlir
@@ -1,4 +1,4 @@
-// RUN: fir-opt --lower-workshare --allow-unregistered-dialect %s 2>&1 | 
FileCheck %s
+// RUN: %not_todo_cmd fir-opt --lower-workshare --allow-unregistered-dialect 
%s 2>&1 | FileCheck %s
 
 // CHECK: not yet implemented: omp workshare with unstructured control flow
 

>From b08c4a372103e54be5f8436735d292a8042118ac Mon Sep 17 00:00:00 2001
From: Ivan Radanov Ivanov 
Date: Sun, 4 Aug 2024 17:33:52 +0900
Subject: [PATCH 2/3] Add workshare loop wrapper lowerings

Bufferize test

Bufferize test

Bufferize test

Add test for should use workshare lowering
---
 .../HLFIR/Transforms/BufferizeHLFIR.cpp   |   4 +-
 .../Transforms/OptimizedBufferization.cpp |  10 +-
 flang/test/HLFIR/bufferize-workshare.fir  |  58 
 .../OpenMP/should-use-workshare-lowering.mlir | 140 ++
 4 files changed, 208 insertions(+), 4 deletions(-)
 create mode 100644 flang/test/HLFIR/bufferize-workshare.fir
 create mode 100644 
flang/test/Transforms/OpenMP/should-use-workshare-lowering.mlir

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp 
b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 07794828fce267..1848dbe2c7a2c2 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -26,6 +26,7 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/PatternMatch.h"
@@ -792,7 +793,8 @@ struct ElementalOpConversion
 // Generate a loop nest looping around the fir.elemental shape and clone
 // fir.elemental region inside the inner loop.
 hlfir::LoopNest loopNest =
-hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+   flangomp::shouldUseWorkshareLowering(elemental));
 auto insPt = builder.saveInsertionPoint();
 builder.setInsertionPointToStart(loopNest.body);
 auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp 
b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 3a0a98dc594463..f014724861e333 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,6 +20,7 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
 #include "flang/Optimizer/Transforms/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Dominance.h"
@@ -482,7 +483,8 @@ llvm::LogicalResult 
ElementalAssignBufferization::matchAndRewrite(
   // Generate a loop nest looping around the hlfir.elemental shape and clone
   // hlfir.elemental region inside the inner loop
   hlfir::LoopNest loopNest =
-  hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered());
+  hlfir::genLoopNest(loc, builder, extents, !elemental.isOrdered(),
+ flangomp::shouldUseWorkshareLowering(elemental));
   builder.setInsertionPointToStart(loopNest.body);
   auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
 loopNest.oneBasedIndices);
@@ -553,7 +555,8 @@ llvm::LogicalResult 
BroadcastAssignBufferization:

[llvm-branch-commits] [hwasan] Replace "-hwasan-with-ifunc" and "-hwasan-with-tls" options (PR #109619)

2024-09-23 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Vitaly Buka (vitalybuka)


Changes

Relationship between "-hwasan-mapping-offset",
"-hwasan-with-ifunc", and "-hwasan-with-tls" can
be to hard to understand.

Now we will have "-hwasan-mapping-offset",
presense of which will imply fixed shadow.

If "-hwasan-mapping-offset-dynamic" will set one
of 3 available dynamic shadows.

As-is "-hwasan-mapping-offset" has precedence over
"-hwasan-mapping-offset-dynamic". In follow up
patches we need to use the one with last
occurrence.


---
Full diff: https://github.com/llvm/llvm-project/pull/109619.diff


6 Files Affected:

- (modified) llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp 
(+19-24) 
- (modified) llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll 
(+1-1) 
- (modified) llvm/test/Instrumentation/HWAddressSanitizer/RISCV/basic.ll (+2-2) 
- (modified) llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll (+2-2) 
- (modified) llvm/test/Instrumentation/HWAddressSanitizer/basic.ll (+2-2) 
- (modified) llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll (+4-4) 


``diff
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index b82bb353793e7f..a058357d7a4558 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -87,6 +87,15 @@ static const size_t kDefaultShadowScale = 4;
 
 static const unsigned kShadowBaseAlignment = 32;
 
+namespace {
+enum class OffsetKind {
+  kFixed = 0,
+  kGlobal,
+  kIfunc,
+  kTls,
+};
+}
+
 static cl::opt
 ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
  cl::desc("Prefix for memory access 
callbacks"),
@@ -170,19 +179,14 @@ static cl::opt
 static cl::opt
 ClMappingOffset("hwasan-mapping-offset",
 cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
-cl::Hidden, cl::init(0));
+cl::Hidden);
 
-static cl::opt
-ClWithIfunc("hwasan-with-ifunc",
-cl::desc("Access dynamic shadow through an ifunc global on "
- "platforms that support this"),
-cl::Hidden, cl::init(false));
-
-static cl::opt ClWithTls(
-"hwasan-with-tls",
-cl::desc("Access dynamic shadow through an thread-local pointer on "
- "platforms that support this"),
-cl::Hidden, cl::init(true));
+static cl::opt ClMappingOffsetDynamic(
+"hwasan-mapping-offset-dynamic",
+cl::desc("HWASan shadow mapping dynamic offset location"), cl::Hidden,
+cl::values(clEnumValN(OffsetKind::kGlobal, "global", "Use global"),
+   clEnumValN(OffsetKind::kIfunc, "ifunc", "Use ifunc global"),
+   clEnumValN(OffsetKind::kTls, "tls", "Use TLS")));
 
 static cl::opt ClHotPercentileCutoff("hwasan-percentile-cutoff-hot",
   cl::desc("Hot percentile cuttoff."));
@@ -399,12 +403,6 @@ class HWAddressSanitizer {
   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
   /// ring buffer for storing stack allocations on targets that support it.
   class ShadowMapping {
-enum class OffsetKind {
-  kFixed = 0,
-  kGlobal,
-  kIfunc,
-  kTls,
-};
 OffsetKind Kind;
 uint64_t Offset;
 uint8_t Scale;
@@ -1941,11 +1939,8 @@ void HWAddressSanitizer::ShadowMapping::init(Triple 
&TargetTriple,
   } else if (ClEnableKhwasan || InstrumentWithCalls) {
 SetFixed(0);
 WithFrameRecord = false;
-  } else if (ClWithIfunc) {
-Kind = OffsetKind::kIfunc;
-WithFrameRecord = false;
-  } else if (!ClWithTls) {
-Kind = OffsetKind::kGlobal;
-WithFrameRecord = false;
+  } else if (ClMappingOffsetDynamic.getNumOccurrences() > 0) {
+Kind = ClMappingOffsetDynamic;
+WithFrameRecord = isInTls();
   }
 }
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll 
b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll
index 5fd9dc6eede211..24a89af97cffeb 100644
--- a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll
@@ -2,7 +2,7 @@
 ; Test alloca instrumentation. Command line includes check-globals so that
 ; changes to debug-info are detectable.
 ;
-; RUN: opt < %s -passes=hwasan -hwasan-with-ifunc=1 -S | FileCheck %s 
--check-prefixes=DYNAMIC-SHADOW
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S | 
FileCheck %s --check-prefixes=DYNAMIC-SHADOW
 ; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S | FileCheck %s 
--check-prefixes=ZERO-BASED-SHADOW
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/basic.ll 
b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/basic.ll
index 5415b081286631..e0eb1115

[llvm-branch-commits] [hwasan] Add "-hwasan-with-frame-record" (PR #109620)

2024-09-23 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Vitaly Buka (vitalybuka)


Changes

It should not be implied form mapping settings.


---
Full diff: https://github.com/llvm/llvm-project/pull/109620.diff


4 Files Affected:

- (modified) llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (+7-2) 
- (modified) llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll 
(+2-2) 
- (modified) llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll (+4-4) 
- (modified) llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll (+2-2) 


``diff
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index a058357d7a4558..e386fa5d50b4d6 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -188,6 +188,11 @@ static cl::opt ClMappingOffsetDynamic(
clEnumValN(OffsetKind::kIfunc, "ifunc", "Use ifunc global"),
clEnumValN(OffsetKind::kTls, "tls", "Use TLS")));
 
+static cl::opt
+ClFrameRecords("hwasan-with-frame-record",
+   cl::desc("Use ring buffer for stack allocations"),
+   cl::Hidden);
+
 static cl::opt ClHotPercentileCutoff("hwasan-percentile-cutoff-hot",
   cl::desc("Hot percentile cuttoff."));
 
@@ -1935,12 +1940,12 @@ void HWAddressSanitizer::ShadowMapping::init(Triple 
&TargetTriple,
 SetFixed(0);
   } else if (ClMappingOffset.getNumOccurrences() > 0) {
 SetFixed(ClMappingOffset);
-WithFrameRecord = false;
   } else if (ClEnableKhwasan || InstrumentWithCalls) {
 SetFixed(0);
 WithFrameRecord = false;
   } else if (ClMappingOffsetDynamic.getNumOccurrences() > 0) {
 Kind = ClMappingOffsetDynamic;
-WithFrameRecord = isInTls();
   }
+
+  WithFrameRecord = optOr(ClFrameRecords, WithFrameRecord);
 }
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll 
b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll
index 24a89af97cffeb..edbcdbeb8516cd 100644
--- a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll
@@ -2,8 +2,8 @@
 ; Test alloca instrumentation. Command line includes check-globals so that
 ; changes to debug-info are detectable.
 ;
-; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S | 
FileCheck %s --check-prefixes=DYNAMIC-SHADOW
-; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S | FileCheck %s 
--check-prefixes=ZERO-BASED-SHADOW
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc 
-hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 
-hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "riscv64-unknown-linux"
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll 
b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll
index 4d0cce72470b96..451ab9ee184a3a 100644
--- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll
+++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll
@@ -2,11 +2,11 @@
 ; Test alloca instrumentation. Command line includes check-globals so that
 ; changes to debug-info are detectable.
 ;
-; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S | 
FileCheck %s --check-prefixes=DYNAMIC-SHADOW
-; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S | FileCheck %s 
--check-prefixes=ZERO-BASED-SHADOW
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc 
-hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=DYNAMIC-SHADOW
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 
-hwasan-with-frame-record=0 -S | FileCheck %s --check-prefixes=ZERO-BASED-SHADOW
 
-; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc -S 
--try-experimental-debuginfo-iterators | FileCheck %s 
--check-prefixes=DYNAMIC-SHADOW
-; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 -S 
--try-experimental-debuginfo-iterators | FileCheck %s 
--check-prefixes=ZERO-BASED-SHADOW
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset-dynamic=ifunc 
-hwasan-with-frame-record=0 -S --try-experimental-debuginfo-iterators | 
FileCheck %s --check-prefixes=DYNAMIC-SHADOW
+; RUN: opt < %s -passes=hwasan -hwasan-mapping-offset=0 
-hwasan-with-frame-record=0 -S --try-experimental-debuginfo-iterators | 
FileCheck %s --check-prefixes=ZERO-BASED-SHADOW
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-android1"
diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll 
b/llvm/test/Instrumentation/HWAddressSanitizer/prologue.ll
index 005a11b00c7a56..73fc077c956242 100644
--- a/llvm/test/Instrumentati

[llvm-branch-commits] [hwasan] Add "-hwasan-with-frame-record" (PR #109620)

2024-09-23 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka created 
https://github.com/llvm/llvm-project/pull/109620

It should not be implied form mapping settings.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][hwasan] Use `enum class` in `ShadowMapping` (PR #109617)

2024-09-23 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Vitaly Buka (vitalybuka)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/109617.diff


1 Files Affected:

- (modified) llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp 
(+30-43) 


``diff
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp 
b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index be0ead40b573d8..2efb97c8759bc9 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -64,6 +64,7 @@
 #include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include 
 #include 
 #include 
 
@@ -83,8 +84,6 @@ const char kHwasanShadowMemoryDynamicAddress[] =
 static const size_t kNumberOfAccessSizes = 5;
 
 static const size_t kDefaultShadowScale = 4;
-static const uint64_t kDynamicShadowSentinel =
-std::numeric_limits::max();
 
 static const unsigned kShadowBaseAlignment = 32;
 
@@ -385,44 +384,44 @@ class HWAddressSanitizer {
   std::unique_ptr Rng;
 
   /// This struct defines the shadow mapping using the rule:
+  /// If `kFixed`, then
   ///   shadow = (mem >> Scale) + Offset.
-  /// If InGlobal is true, then
+  /// If `kGlobal`, then
+  ///   extern char* __hwasan_shadow_memory_dynamic_address;
+  ///   shadow = (mem >> Scale) + __hwasan_shadow_memory_dynamic_address
+  /// If `kIfunc`, then
   ///   extern char __hwasan_shadow[];
   ///   shadow = (mem >> Scale) + &__hwasan_shadow
-  /// If InTls is true, then
+  /// If `kTls`, then
   ///   extern char *__hwasan_tls;
   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
   ///
   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
   /// ring buffer for storing stack allocations on targets that support it.
   class ShadowMapping {
-uint8_t Scale;
+enum class OffsetKind {
+  kFixed = 0,
+  kGlobal,
+  kIfunc,
+  kTls,
+};
+OffsetKind Kind;
 uint64_t Offset;
-bool InGlobal;
-bool InTls;
+uint8_t Scale;
 bool WithFrameRecord;
 
+void SetFixed(uint64_t O) {
+  Kind = OffsetKind::kFixed;
+  Offset = O;
+}
+
   public:
 void init(Triple &TargetTriple, bool InstrumentWithCalls);
 Align getObjectAlignment() const { return Align(1ULL << Scale); }
-bool isInGlobal() const {
-  return !InGlobal && !InTls && Offset == kDynamicShadowSentinel;
-}
-bool isInifunc() const {
-  assert(!InGlobal || !InTls);
-  assert(!InGlobal || Offset == kDynamicShadowSentinel);
-  return InGlobal;
-}
-bool isInTls() const {
-  assert(!InTls || !InGlobal);
-  assert(!InTls || Offset == kDynamicShadowSentinel);
-  return InTls;
-}
-bool isFixed() const {
-  assert(Offset == kDynamicShadowSentinel || !InTls);
-  assert(Offset == kDynamicShadowSentinel || !InGlobal);
-  return Offset != kDynamicShadowSentinel;
-}
+bool isInGlobal() const { return Kind == OffsetKind::kGlobal; }
+bool isInifunc() const { return Kind == OffsetKind::kIfunc; }
+bool isInTls() const { return Kind == OffsetKind::kTls; }
+bool isFixed() const { return Kind == OffsetKind::kFixed; }
 uint8_t scale() const { return Scale; };
 uint64_t offset() const {
   assert(isFixed());
@@ -1930,34 +1929,22 @@ void HWAddressSanitizer::ShadowMapping::init(Triple 
&TargetTriple,
   if (TargetTriple.isOSFuchsia()) {
 // Fuchsia is always PIE, which means that the beginning of the address
 // space is always available.
-InGlobal = false;
-InTls = false;
-Offset = 0;
+SetFixed(0);
 WithFrameRecord = true;
   } else if (ClMappingOffset.getNumOccurrences() > 0) {
-InGlobal = false;
-InTls = false;
-Offset = ClMappingOffset;
+SetFixed(ClMappingOffset);
 WithFrameRecord = false;
   } else if (ClEnableKhwasan || InstrumentWithCalls) {
-InGlobal = false;
-InTls = false;
-Offset = 0;
+SetFixed(0);
 WithFrameRecord = false;
   } else if (ClWithIfunc) {
-InGlobal = true;
-InTls = false;
-Offset = kDynamicShadowSentinel;
+Kind = OffsetKind::kIfunc;
 WithFrameRecord = false;
   } else if (ClWithTls) {
-InGlobal = false;
-InTls = true;
-Offset = kDynamicShadowSentinel;
+Kind = OffsetKind::kTls;
 WithFrameRecord = true;
   } else {
-InGlobal = false;
-InTls = false;
-Offset = kDynamicShadowSentinel;
+Kind = OffsetKind::kGlobal;
 WithFrameRecord = false;
   }
 }

``




https://github.com/llvm/llvm-project/pull/109617
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [hwasan] Replace "-hwasan-with-ifunc" and "-hwasan-with-tls" options (PR #109619)

2024-09-23 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka created 
https://github.com/llvm/llvm-project/pull/109619

Relationship between "-hwasan-mapping-offset",
"-hwasan-with-ifunc", and "-hwasan-with-tls" can
be to hard to understand.

Now we will have "-hwasan-mapping-offset",
presense of which will imply fixed shadow.

If "-hwasan-mapping-offset-dynamic" will set one
of 3 available dynamic shadows.

As-is "-hwasan-mapping-offset" has precedence over
"-hwasan-mapping-offset-dynamic". In follow up
patches we need to use the one with last
occurrence.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][hwasan] Remove code duplication in ShadowMapping::init (PR #109618)

2024-09-23 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka created 
https://github.com/llvm/llvm-project/pull/109618

None


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][hwasan] Use `enum class` in `ShadowMapping` (PR #109617)

2024-09-23 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka created 
https://github.com/llvm/llvm-project/pull/109617

None


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoopPeel] Fix LCSSA phi node invalidation (PR #109624)

2024-09-23 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/109624

Backport 5bcc82d43388bb0daa122d5fe7ecda5eca27fc16

Requested by: @nikic

>From 9fbd83b1e5b61eb80ba8cef41726a62086ea0ac0 Mon Sep 17 00:00:00 2001
From: Nikita Popov 
Date: Fri, 20 Sep 2024 16:57:46 +0200
Subject: [PATCH] [LoopPeel] Fix LCSSA phi node invalidation

In the test case, the BECount of the second loop uses %load,
but we only have an LCSSA phi node for %add, so that is what
gets invalidated. Use the forgetLcssaPhiWithNewPredecessor()
API instead, which will invalidate the roots of the expression
instead.

Fixes https://github.com/llvm/llvm-project/issues/109333.

(cherry picked from commit 5bcc82d43388bb0daa122d5fe7ecda5eca27fc16)
---
 llvm/lib/Transforms/Utils/LoopPeel.cpp  |   2 +-
 llvm/test/Transforms/LoopUnroll/pr109333.ll | 104 
 2 files changed, 105 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/pr109333.ll

diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp 
b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 5d7c0d947facc4..760f1619e030c3 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -859,7 +859,7 @@ static void cloneLoopBlocks(
   if (LatchInst && L->contains(LatchInst))
 LatchVal = VMap[LatchVal];
   PHI.addIncoming(LatchVal, cast(VMap[Edge.first]));
-  SE.forgetValue(&PHI);
+  SE.forgetLcssaPhiWithNewPredecessor(L, &PHI);
 }
 
   // LastValueMap is updated with the values for the current loop
diff --git a/llvm/test/Transforms/LoopUnroll/pr109333.ll 
b/llvm/test/Transforms/LoopUnroll/pr109333.ll
new file mode 100644
index 00..f7ac911a78207a
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/pr109333.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 5
+; RUN: opt -S -passes="print,loop-unroll" -unroll-runtime < 
%s 2>/dev/null | FileCheck %s
+
+; Make sure we use %add.lcssa rather than %load when expanding the
+; backedge taken count.
+
+define void @test(i1 %c, ptr %p) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:br label %[[LOOP_1_PEEL_BEGIN:.*]]
+; CHECK:   [[LOOP_1_PEEL_BEGIN]]:
+; CHECK-NEXT:br label %[[LOOP_1_PEEL:.*]]
+; CHECK:   [[LOOP_1_PEEL]]:
+; CHECK-NEXT:[[LOAD_PEEL:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:[[ADD_PEEL:%.*]] = add i64 [[LOAD_PEEL]], 1
+; CHECK-NEXT:br i1 [[C]], label %[[IF:.*]], label %[[LOOP_1_PEEL_NEXT:.*]]
+; CHECK:   [[LOOP_1_PEEL_NEXT]]:
+; CHECK-NEXT:br label %[[LOOP_1_PEEL_NEXT1:.*]]
+; CHECK:   [[LOOP_1_PEEL_NEXT1]]:
+; CHECK-NEXT:br label %[[ENTRY_PEEL_NEWPH:.*]]
+; CHECK:   [[ENTRY_PEEL_NEWPH]]:
+; CHECK-NEXT:br label %[[LOOP_1:.*]]
+; CHECK:   [[LOOP_1]]:
+; CHECK-NEXT:[[LOAD:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:[[ADD:%.*]] = add i64 [[LOAD]], 1
+; CHECK-NEXT:br i1 [[C]], label %[[IF_LOOPEXIT:.*]], label %[[LOOP_1]], 
!llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:   [[IF_LOOPEXIT]]:
+; CHECK-NEXT:[[ADD_LCSSA_PH:%.*]] = phi i64 [ [[ADD]], %[[LOOP_1]] ]
+; CHECK-NEXT:br label %[[IF]]
+; CHECK:   [[IF]]:
+; CHECK-NEXT:[[ADD_LCSSA:%.*]] = phi i64 [ [[ADD_PEEL]], %[[LOOP_1_PEEL]] 
], [ [[ADD_LCSSA_PH]], %[[IF_LOOPEXIT]] ]
+; CHECK-NEXT:[[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[ADD_LCSSA]]
+; CHECK-NEXT:[[TMP0:%.*]] = shl i64 [[ADD_LCSSA]], 3
+; CHECK-NEXT:[[TMP1:%.*]] = lshr i64 [[TMP0]], 3
+; CHECK-NEXT:[[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:[[XTRAITER:%.*]] = and i64 [[TMP2]], 7
+; CHECK-NEXT:[[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:br i1 [[LCMP_MOD]], label %[[LOOP_2_PROL_PREHEADER:.*]], 
label %[[LOOP_2_PROL_LOOPEXIT:.*]]
+; CHECK:   [[LOOP_2_PROL_PREHEADER]]:
+; CHECK-NEXT:br label %[[LOOP_2_PROL:.*]]
+; CHECK:   [[LOOP_2_PROL]]:
+; CHECK-NEXT:[[IV_PROL:%.*]] = phi ptr [ [[P]], %[[LOOP_2_PROL_PREHEADER]] 
], [ [[IV_NEXT_PROL:%.*]], %[[LOOP_2_PROL]] ]
+; CHECK-NEXT:[[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_2_PROL_PREHEADER]] 
], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_2_PROL]] ]
+; CHECK-NEXT:[[IV_NEXT_PROL]] = getelementptr i8, ptr [[IV_PROL]], i64 8
+; CHECK-NEXT:[[ICMP_PROL:%.*]] = icmp eq ptr [[IV_PROL]], [[GEP]]
+; CHECK-NEXT:[[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:[[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], 
[[XTRAITER]]
+; CHECK-NEXT:br i1 [[PROL_ITER_CMP]], label %[[LOOP_2_PROL]], label 
%[[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:   [[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:[[IV_UNR_PH:%.*]] = phi ptr [ [[IV_NEXT_PROL]], 
%[[LOOP_2_PROL]] ]
+; CHECK-NEXT:br label %[[LOOP_2_PROL_LOOPEXIT]]
+; CHECK:   [[LOOP_2_PROL_LOOPEXIT]]:
+; CHECK-NEXT:[[IV_UNR:%.*]] = phi ptr 

[llvm-branch-commits] [llvm] release/19.x: [LoopPeel] Fix LCSSA phi node invalidation (PR #109624)

2024-09-23 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: None (llvmbot)


Changes

Backport 5bcc82d43388bb0daa122d5fe7ecda5eca27fc16

Requested by: @nikic

---
Full diff: https://github.com/llvm/llvm-project/pull/109624.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Utils/LoopPeel.cpp (+1-1) 
- (added) llvm/test/Transforms/LoopUnroll/pr109333.ll (+104) 


``diff
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp 
b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 5d7c0d947facc4..760f1619e030c3 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -859,7 +859,7 @@ static void cloneLoopBlocks(
   if (LatchInst && L->contains(LatchInst))
 LatchVal = VMap[LatchVal];
   PHI.addIncoming(LatchVal, cast(VMap[Edge.first]));
-  SE.forgetValue(&PHI);
+  SE.forgetLcssaPhiWithNewPredecessor(L, &PHI);
 }
 
   // LastValueMap is updated with the values for the current loop
diff --git a/llvm/test/Transforms/LoopUnroll/pr109333.ll 
b/llvm/test/Transforms/LoopUnroll/pr109333.ll
new file mode 100644
index 00..f7ac911a78207a
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/pr109333.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 5
+; RUN: opt -S -passes="print,loop-unroll" -unroll-runtime < 
%s 2>/dev/null | FileCheck %s
+
+; Make sure we use %add.lcssa rather than %load when expanding the
+; backedge taken count.
+
+define void @test(i1 %c, ptr %p) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:br label %[[LOOP_1_PEEL_BEGIN:.*]]
+; CHECK:   [[LOOP_1_PEEL_BEGIN]]:
+; CHECK-NEXT:br label %[[LOOP_1_PEEL:.*]]
+; CHECK:   [[LOOP_1_PEEL]]:
+; CHECK-NEXT:[[LOAD_PEEL:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:[[ADD_PEEL:%.*]] = add i64 [[LOAD_PEEL]], 1
+; CHECK-NEXT:br i1 [[C]], label %[[IF:.*]], label %[[LOOP_1_PEEL_NEXT:.*]]
+; CHECK:   [[LOOP_1_PEEL_NEXT]]:
+; CHECK-NEXT:br label %[[LOOP_1_PEEL_NEXT1:.*]]
+; CHECK:   [[LOOP_1_PEEL_NEXT1]]:
+; CHECK-NEXT:br label %[[ENTRY_PEEL_NEWPH:.*]]
+; CHECK:   [[ENTRY_PEEL_NEWPH]]:
+; CHECK-NEXT:br label %[[LOOP_1:.*]]
+; CHECK:   [[LOOP_1]]:
+; CHECK-NEXT:[[LOAD:%.*]] = load i64, ptr [[P]], align 8
+; CHECK-NEXT:[[ADD:%.*]] = add i64 [[LOAD]], 1
+; CHECK-NEXT:br i1 [[C]], label %[[IF_LOOPEXIT:.*]], label %[[LOOP_1]], 
!llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:   [[IF_LOOPEXIT]]:
+; CHECK-NEXT:[[ADD_LCSSA_PH:%.*]] = phi i64 [ [[ADD]], %[[LOOP_1]] ]
+; CHECK-NEXT:br label %[[IF]]
+; CHECK:   [[IF]]:
+; CHECK-NEXT:[[ADD_LCSSA:%.*]] = phi i64 [ [[ADD_PEEL]], %[[LOOP_1_PEEL]] 
], [ [[ADD_LCSSA_PH]], %[[IF_LOOPEXIT]] ]
+; CHECK-NEXT:[[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[ADD_LCSSA]]
+; CHECK-NEXT:[[TMP0:%.*]] = shl i64 [[ADD_LCSSA]], 3
+; CHECK-NEXT:[[TMP1:%.*]] = lshr i64 [[TMP0]], 3
+; CHECK-NEXT:[[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:[[XTRAITER:%.*]] = and i64 [[TMP2]], 7
+; CHECK-NEXT:[[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:br i1 [[LCMP_MOD]], label %[[LOOP_2_PROL_PREHEADER:.*]], 
label %[[LOOP_2_PROL_LOOPEXIT:.*]]
+; CHECK:   [[LOOP_2_PROL_PREHEADER]]:
+; CHECK-NEXT:br label %[[LOOP_2_PROL:.*]]
+; CHECK:   [[LOOP_2_PROL]]:
+; CHECK-NEXT:[[IV_PROL:%.*]] = phi ptr [ [[P]], %[[LOOP_2_PROL_PREHEADER]] 
], [ [[IV_NEXT_PROL:%.*]], %[[LOOP_2_PROL]] ]
+; CHECK-NEXT:[[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_2_PROL_PREHEADER]] 
], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_2_PROL]] ]
+; CHECK-NEXT:[[IV_NEXT_PROL]] = getelementptr i8, ptr [[IV_PROL]], i64 8
+; CHECK-NEXT:[[ICMP_PROL:%.*]] = icmp eq ptr [[IV_PROL]], [[GEP]]
+; CHECK-NEXT:[[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
+; CHECK-NEXT:[[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], 
[[XTRAITER]]
+; CHECK-NEXT:br i1 [[PROL_ITER_CMP]], label %[[LOOP_2_PROL]], label 
%[[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:   [[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA]]:
+; CHECK-NEXT:[[IV_UNR_PH:%.*]] = phi ptr [ [[IV_NEXT_PROL]], 
%[[LOOP_2_PROL]] ]
+; CHECK-NEXT:br label %[[LOOP_2_PROL_LOOPEXIT]]
+; CHECK:   [[LOOP_2_PROL_LOOPEXIT]]:
+; CHECK-NEXT:[[IV_UNR:%.*]] = phi ptr [ [[P]], %[[IF]] ], [ [[IV_UNR_PH]], 
%[[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:[[TMP3:%.*]] = icmp ult i64 [[TMP1]], 7
+; CHECK-NEXT:br i1 [[TMP3]], label %[[EXIT:.*]], label %[[IF_NEW:.*]]
+; CHECK:   [[IF_NEW]]:
+; CHECK-NEXT:br label %[[LOOP_2:.*]]
+; CHECK:   [[LOOP_2]]:
+; CHECK-NEXT:[[IV:%.*]] = phi ptr [ [[IV_UNR]], %[[IF_NEW]] ], [ 
[[IV_NEXT_7:%.*]], %[[LOOP_2]] ]
+; CHECK-NEXT:[[IV_NEXT:%.*]] = getelementptr i8, ptr [[IV]], i64 8
+; CHECK-NEXT:[[IV_NEXT_1:%.*]] = getelementptr i8, ptr [[IV_NEXT]], i64 8
+; CHECK-NEXT:[[IV_NE

[llvm-branch-commits] [llvm] release/19.x: [LoopPeel] Fix LCSSA phi node invalidation (PR #109624)

2024-09-23 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/109624
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [TySan] Fix struct access with different bases (PR #108385)

2024-09-23 Thread Tavian Barnes via llvm-branch-commits

https://github.com/tavianator approved this pull request.


https://github.com/llvm/llvm-project/pull/108385
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] 20f7777 - Revert "[MLIR][XeGPU] Updates XeGPU TensorDescAttr and Refine Gather/Scatter …"

2024-09-23 Thread via llvm-branch-commits

Author: Chao Chen
Date: 2024-09-23T09:03:53-05:00
New Revision: 20f5cd2516c1a0ef12d5f8a625b31b2448d5

URL: 
https://github.com/llvm/llvm-project/commit/20f5cd2516c1a0ef12d5f8a625b31b2448d5
DIFF: 
https://github.com/llvm/llvm-project/commit/20f5cd2516c1a0ef12d5f8a625b31b2448d5.diff

LOG: Revert "[MLIR][XeGPU] Updates XeGPU TensorDescAttr and Refine 
Gather/Scatter …"

This reverts commit 21627236363d629f6a5b820f45a6071371e4b8db.

Added: 


Modified: 
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUOps.td
mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td
mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
mlir/test/Dialect/XeGPU/XeGPUOps.mlir
mlir/test/Dialect/XeGPU/invalid.mlir

Removed: 




diff  --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td 
b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
index 26eec0d4f2082a..f3ca09a6a68ea8 100644
--- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
+++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td
@@ -19,18 +19,12 @@ class XeGPUAttr traits = [],
   let mnemonic = attrMnemonic;
 }
 
-class XeGPU_TensorDescAttr 
traits = [],
- string baseCppClass = "::mlir::Attribute">
-: XeGPUAttr {
-  let assemblyFormat = "`<` struct(params) `>`";
-}
-
-def XeGPU_BlockTensorDescAttr: XeGPU_TensorDescAttr<"BlockTensorDesc", 
"block_tdesc_attr"> {
+def XeGPU_TensorDescAttr: XeGPUAttr<"TensorDesc", "tdesc_attr"> {
   let summary = [{a composite attribute for `TensorDescType`}];
-  let description = [{`BlockTensorDesc` (or `block_tdesc_attr`) is a composite
+  let description = [{`TensorDescAttr` (or `tdesc_attr`) is a composite
 attribute defined for `TensorDescType` for describing following
 properties of a `TensorDesc`.
-1. `memory_space`: It describes where the data block described by the
+1. `memory_scope`: It describes where the data block described by the
 TensorDesc is located, `Global` device memory or `Shared` local memory.
 It is default to `Global`.
 2. `array_length`: It describes how many horizontally consecutive blocks
@@ -39,63 +33,43 @@ def XeGPU_BlockTensorDescAttr: 
XeGPU_TensorDescAttr<"BlockTensorDesc", "block_td
 8x32. Its default value is 1.
 3. `boundary_check`: It is used to indicates the hardware whether to do
 out-of-boundary check. The default value is true.
+4. `scattered`: It is used to 
diff erenciate TensorDescs created from
+   `create_nd_tdesc` vs from `create_tdesc`.
   }];
 
   let parameters = (ins
-OptionalParameter<"MemorySpaceAttr">: $memory_space,
+OptionalParameter<"MemoryScopeAttr">: $memory_scope,
 OptionalParameter<"IntegerAttr", "1">: $array_length,
-OptionalParameter<"BoolAttr", "true">: $boundary_check
+OptionalParameter<"BoolAttr", "true">: $boundary_check,
+OptionalParameter<"BoolAttr", "false">: $scattered
   );
 
   let builders = [
 AttrBuilder<(ins
-  CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space,
+  CArg<"xegpu::MemoryScope", "xegpu::MemoryScope::Global">:$memory_scope,
   CArg<"int", "1">:$array_length,
-  CArg<"bool", "true">: $boundary_check
+  CArg<"bool", "true">: $boundary_check,
+  CArg<"bool", "false">: $scattered
 )>
   ];
 
+  let assemblyFormat = "`<` struct(params) `>`";
 }
 
-def XeGPU_ScatterTensorDescAttr: XeGPU_TensorDescAttr<"ScatterTensorDesc", 
"scatter_tdesc_attr"> {
-  let summary = [{a composite attribute for `TensorDescType`}];
-  let description = [{`ScatterTensorDesc` (or `scatter_tdesc_attr`) is a 
composite
-attribute defined for `TensorDescType` for describing following
-properties of a `TensorDesc`.
-1. `memory_space`: It describes where the data block described by the
-TensorDesc is located, `Global` device memory or `Shared` local memory.
-It is default to `Global`.
-2.  `chunk_size`: indicates number of continious elements accessed for each
-offset, default is 1. It is used with `scattered` attr only.
-  }];
-
-  let parameters = (ins
-OptionalParameter<"MemorySpaceAttr">: $memory_space,
-OptionalParameter<"IntegerAttr", "1">: $chunk_size
-  );
-
-  let builders = [
-AttrBuilder<(ins
-  CArg<"xegpu::MemorySpace", "xegpu::MemorySpace::Global">:$memory_space,
-  CArg<"int", "1">: $chunk_size
-)>
-  ];
- }
-
 
//===--===//
 // XeGPU Memory Scope Enums.
 
//===--===//
-def XeGPU_MemorySpaceGlobal: I32EnumAttrCase<"Global", 0, "global">;
-def XeGPU_MemorySpaceShared: I32EnumAttrCase<"SLM", 3, "slm">;
-def XeGPU_MemorySpace: I32EnumAttr<"MemorySpace",
+def XeGPU_MemoryScopeGlobal: I32EnumAttrCase<"Global", 0, "global">;
+def XeG

[llvm-branch-commits] [llvm] [CodeGen][NewPM] Port machine trace metrics analysis to new pass manager. (PR #108507)

2024-09-23 Thread Matt Arsenault via llvm-branch-commits


@@ -102,19 +103,22 @@ class MachineTraceMetrics : public MachineFunctionPass {
   TargetSchedModel SchedModel;
 
 public:
+  friend class MachineTraceMetricsWrapperPass;
   friend class Ensemble;
   friend class Trace;
 
   class Ensemble;
 
-  static char ID;
+  // For legacy pass.
+  MachineTraceMetrics() {
+std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
+  }

arsenm wrote:

So the Ensembles should just get = 0 initializer in the declaration? Should 
also probably be using unique_ptr 

https://github.com/llvm/llvm-project/pull/108507
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [hwasan] Consider order of mapping copts (PR #109621)

2024-09-23 Thread Vitaly Buka via llvm-branch-commits

vitalybuka wrote:

>  "Consider"?

Done



https://github.com/llvm/llvm-project/pull/109621
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [hwasan] Consider order of mapping copts (PR #109621)

2024-09-23 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka edited 
https://github.com/llvm/llvm-project/pull/109621
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)

2024-09-23 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/109203

>From 2f34ca7929fd4040831d1ae51fd65420d5d2b630 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Wed, 18 Sep 2024 14:18:23 -0700
Subject: [PATCH] [ctx_prof] Automatically convert available external linkage
 to local for modules with contextual roots

---
 llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 -
 .../transform-to-local.ll   | 13 +++--
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp 
b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 2b34d3b5a56ea4..644effab9414ba 100644
--- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Function.h"
@@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) {
   ++NumConversions;
 }
 
-static bool eliminateAvailableExternally(Module &M) {
+static bool eliminateAvailableExternally(Module &M, bool Convert) {
   bool Changed = false;
 
   // Drop initializers of available externally global variables.
@@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) {
 if (F.isDeclaration() || !F.hasAvailableExternallyLinkage())
   continue;
 
-if (ConvertToLocal)
+if (Convert || ConvertToLocal)
   convertToLocalCopy(M, F);
 else
   deleteFunction(F);
@@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) {
 }
 
 PreservedAnalyses
-EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) {
-  if (!eliminateAvailableExternally(M))
-return PreservedAnalyses::all();
+EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) {
+  auto *CtxProf = MAM.getCachedResult(M);
+  if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf
+;
+  return PreservedAnalyses::all();
   return PreservedAnalyses::none();
 }
diff --git 
a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll 
b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
index 786cc260d331c6..d0b96daf3bf3b1 100644
--- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
+++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
@@ -1,6 +1,11 @@
 ; REQUIRES: asserts
 ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < 
%s | FileCheck %s
+; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s
 
+; If the profile doesn't apply to this module, nothing gets converted.
+; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile_bad.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s 
--check-prefix=NOOP
 
 declare void @call_out(ptr %fct)
 
@@ -12,13 +17,15 @@ define available_externally hidden void @g() {
   ret void
 }
 
-define void @hello(ptr %g) {
+define void @hello(ptr %g) !guid !0 {
   call void @f()
   %f = load ptr, ptr @f
   call void @call_out(ptr %f)
   ret void
 }
 
+!0 = !{i64 1234}
+
 ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}()
 ; CHECK: declare hidden void @g()
 ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}()
@@ -26,4 +33,6 @@ define void @hello(ptr %g) {
 ; CHECK-NEXT: call void @call_out(ptr %f)
 ; CHECK: Statistics Collected
 ; CHECK: 1 elim-avail-extern - Number of functions converted
-; CHECK: 1 elim-avail-extern - Number of functions removed
\ No newline at end of file
+; CHECK: 1 elim-avail-extern - Number of functions removed
+
+; NOOP: 2 elim-avail-extern - Number of functions removed
\ No newline at end of file

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [ctx_prof] Automatically convert available external linkage to local for modules with contextual roots (PR #109203)

2024-09-23 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/109203

>From 4b6269df2546069716bf3c7cc566e7ef918a02c5 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Wed, 18 Sep 2024 14:18:23 -0700
Subject: [PATCH] [ctx_prof] Automatically convert available external linkage
 to local for modules with contextual roots

---
 llvm/lib/Transforms/IPO/ElimAvailExtern.cpp | 13 -
 .../transform-to-local.ll   | 13 +++--
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp 
b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 2b34d3b5a56ea4..644effab9414ba 100644
--- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Function.h"
@@ -88,7 +89,7 @@ static void convertToLocalCopy(Module &M, Function &F) {
   ++NumConversions;
 }
 
-static bool eliminateAvailableExternally(Module &M) {
+static bool eliminateAvailableExternally(Module &M, bool Convert) {
   bool Changed = false;
 
   // Drop initializers of available externally global variables.
@@ -112,7 +113,7 @@ static bool eliminateAvailableExternally(Module &M) {
 if (F.isDeclaration() || !F.hasAvailableExternallyLinkage())
   continue;
 
-if (ConvertToLocal)
+if (Convert || ConvertToLocal)
   convertToLocalCopy(M, F);
 else
   deleteFunction(F);
@@ -125,8 +126,10 @@ static bool eliminateAvailableExternally(Module &M) {
 }
 
 PreservedAnalyses
-EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) {
-  if (!eliminateAvailableExternally(M))
-return PreservedAnalyses::all();
+EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &MAM) {
+  auto *CtxProf = MAM.getCachedResult(M);
+  if (!eliminateAvailableExternally(M, (CtxProf && !!(*CtxProf
+;
+  return PreservedAnalyses::all();
   return PreservedAnalyses::none();
 }
diff --git 
a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll 
b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
index 786cc260d331c6..d0b96daf3bf3b1 100644
--- a/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
+++ b/llvm/test/Transforms/EliminateAvailableExternally/transform-to-local.ll
@@ -1,6 +1,11 @@
 ; REQUIRES: asserts
 ; RUN: opt -passes=elim-avail-extern -avail-extern-to-local -stats -S 2>&1 < 
%s | FileCheck %s
+; RUN: echo '[{"Guid":1234, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s
 
+; If the profile doesn't apply to this module, nothing gets converted.
+; RUN: echo '[{"Guid":5678, "Counters": [1]}]' | llvm-ctxprof-util fromJSON 
--input=- --output=%t_profile_bad.ctxprofdata
+; RUN: opt -passes='assign-guid,require,elim-avail-extern' 
-use-ctx-profile=%t_profile_bad.ctxprofdata -stats -S 2>&1 < %s | FileCheck %s 
--check-prefix=NOOP
 
 declare void @call_out(ptr %fct)
 
@@ -12,13 +17,15 @@ define available_externally hidden void @g() {
   ret void
 }
 
-define void @hello(ptr %g) {
+define void @hello(ptr %g) !guid !0 {
   call void @f()
   %f = load ptr, ptr @f
   call void @call_out(ptr %f)
   ret void
 }
 
+!0 = !{i64 1234}
+
 ; CHECK: define internal void @f.__uniq.{{[0-9|a-f]*}}()
 ; CHECK: declare hidden void @g()
 ; CHECK: call void @f.__uniq.{{[0-9|a-f]*}}()
@@ -26,4 +33,6 @@ define void @hello(ptr %g) {
 ; CHECK-NEXT: call void @call_out(ptr %f)
 ; CHECK: Statistics Collected
 ; CHECK: 1 elim-avail-extern - Number of functions converted
-; CHECK: 1 elim-avail-extern - Number of functions removed
\ No newline at end of file
+; CHECK: 1 elim-avail-extern - Number of functions removed
+
+; NOOP: 2 elim-avail-extern - Number of functions removed
\ No newline at end of file

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Expand flat atomics that may access private memory (PR #109407)

2024-09-23 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> Thanks. Can this be landed after #102462?

That was the plan 

https://github.com/llvm/llvm-project/pull/109407
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NVPTX] add support for encoding PTX registers for DWARF (PR #109495)

2024-09-23 Thread William G Hatch via llvm-branch-commits


@@ -141,3 +142,47 @@ NVPTXRegisterInfo::getFrameLocalRegister(const 
MachineFunction &MF) const {
   static_cast(MF.getTarget());
   return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32;
 }
+
+void NVPTXRegisterInfo::clearDebugRegisterMap() const {
+  debugRegisterMap.clear();
+}
+
+static uint64_t encodeRegisterForDwarf(std::string registerName) {
+  if (registerName.length() > 8) {
+// The name is more than 8 characters long, and so won't fit into 64 bits.
+return 0;
+  }

willghatch wrote:

Actually, no, `%envreg` is long enough that it would only allow 1 digit.

https://github.com/llvm/llvm-project/pull/109495
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NVPTX] add support for encoding PTX registers for DWARF (PR #109495)

2024-09-23 Thread William G Hatch via llvm-branch-commits


@@ -141,3 +142,47 @@ NVPTXRegisterInfo::getFrameLocalRegister(const 
MachineFunction &MF) const {
   static_cast(MF.getTarget());
   return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32;
 }
+
+void NVPTXRegisterInfo::clearDebugRegisterMap() const {
+  debugRegisterMap.clear();
+}
+
+static uint64_t encodeRegisterForDwarf(std::string registerName) {
+  if (registerName.length() > 8) {
+// The name is more than 8 characters long, and so won't fit into 64 bits.
+return 0;
+  }
+
+  // Encode the name string into a DWARF register number using cuda-gdb's
+  // encoding.  See cuda_check_dwarf2_reg_ptx_virtual_register in cuda-tdep.c,
+  // 
https://github.com/NVIDIA/cuda-gdb/blob/e5cf3bddae520ffb326f95b4d98ce5c7474b828b/gdb/cuda/cuda-tdep.c#L353
+  // IE the bytes of the string are concatenated in reverse into a single
+  // number, which is stored in ULEB128, but in practice must be no more than 8
+  // bytes (excluding null terminator, which is not included).
+  uint64_t result = 0;
+  for (int i = 0; i < registerName.length(); ++i) {
+result = result << 8;
+char c = registerName[i];

willghatch wrote:

Oops, yes, I forgot about char signedness.  In practice this only handles ASCII 
characters, so the high bit will always be zero.  But it should be unsigned.

https://github.com/llvm/llvm-project/pull/109495
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][hwasan] Use `enum class` in `ShadowMapping` (PR #109617)

2024-09-23 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer approved this pull request.


https://github.com/llvm/llvm-project/pull/109617
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [hwasan] Replace "-hwasan-with-ifunc" and "-hwasan-with-tls" options (PR #109619)

2024-09-23 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer approved this pull request.


https://github.com/llvm/llvm-project/pull/109619
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [NVPTX] add support for encoding PTX registers for DWARF (PR #109495)

2024-09-23 Thread William G Hatch via llvm-branch-commits


@@ -141,3 +142,47 @@ NVPTXRegisterInfo::getFrameLocalRegister(const 
MachineFunction &MF) const {
   static_cast(MF.getTarget());
   return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32;
 }
+
+void NVPTXRegisterInfo::clearDebugRegisterMap() const {
+  debugRegisterMap.clear();
+}
+
+static uint64_t encodeRegisterForDwarf(std::string registerName) {
+  if (registerName.length() > 8) {
+// The name is more than 8 characters long, and so won't fit into 64 bits.
+return 0;
+  }

willghatch wrote:

I believe that the longest register name prefix in use is 4 characters, leaving 
4 more characters for the decimal number, meaning 10k registers.  So it could 
just be an error, and probably should.  10k registers ought to be enough for 
anyone.

https://github.com/llvm/llvm-project/pull/109495
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][hwasan] Use `enum class` in `ShadowMapping` (PR #109617)

2024-09-23 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka updated 
https://github.com/llvm/llvm-project/pull/109617


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][hwasan] Use `enum class` in `ShadowMapping` (PR #109617)

2024-09-23 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka updated 
https://github.com/llvm/llvm-project/pull/109617


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [hwasan] Add "-hwasan-with-frame-record" (PR #109620)

2024-09-23 Thread Florian Mayer via llvm-branch-commits

fmayer wrote:

Do you remember why this was implied from the mapping configuration in the 
first place?

https://github.com/llvm/llvm-project/pull/109620
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [hwasan] Add "-hwasan-with-frame-record" (PR #109620)

2024-09-23 Thread Vitaly Buka via llvm-branch-commits

vitalybuka wrote:

> Do you remember why this was implied from the mapping configuration in the 
> first place?

No.

https://github.com/llvm/llvm-project/pull/109620
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][hwasan] Remove code duplication in ShadowMapping::init (PR #109618)

2024-09-23 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka updated 
https://github.com/llvm/llvm-project/pull/109618


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [NFC][hwasan] Remove code duplication in ShadowMapping::init (PR #109618)

2024-09-23 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka updated 
https://github.com/llvm/llvm-project/pull/109618


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [bolt][tests] Skip tests that use perf when perf counters are unavailable (#107892) (PR #109072)

2024-09-23 Thread via llvm-branch-commits

github-actions[bot] wrote:

@tstellar (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/109072
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [C++20] [Modules] Treat in class defined member functions in language linkage as implicitly inline (PR #109077)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

This looks safe - @ChuanqiXu9 can you approve if it looks alright to you?

https://github.com/llvm/llvm-project/pull/109077
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [C++20] [Modules] Treat constexpr/consteval member function as implicitly inline (PR #109076)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

@ChuanqiXu9 this looks safe enough to be picked. Does the PR look fine to you?

https://github.com/llvm/llvm-project/pull/109076
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 22139b3 - Reland [llvm-ml] Fix RIP-relative addressing for ptr operands (#108061)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

Author: Andrew Ng
Date: 2024-09-24T08:15:17+02:00
New Revision: 22139b36d7a81cd2dc08e9499a915def4aefc64e

URL: 
https://github.com/llvm/llvm-project/commit/22139b36d7a81cd2dc08e9499a915def4aefc64e
DIFF: 
https://github.com/llvm/llvm-project/commit/22139b36d7a81cd2dc08e9499a915def4aefc64e.diff

LOG: Reland [llvm-ml] Fix RIP-relative addressing for ptr operands (#108061)

Relands #107618 with fix for assertion triggered by OpenMP runtime MASM
assembly source.

(cherry picked from commit 7574e1ddc4be63628cb7617857cc8938058a79d2)

Added: 


Modified: 
llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
llvm/test/tools/llvm-ml/rip_relative_addressing.asm

Removed: 




diff  --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp 
b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c7f88fed9b128b..efbcb57add98c9 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2707,7 +2707,8 @@ bool X86AsmParser::parseIntelOperand(OperandVector 
&Operands, StringRef Name) {
   bool MaybeDirectBranchDest = true;
 
   if (Parser.isParsingMasm()) {
-if (is64BitMode() && SM.getElementSize() > 0) {
+if (is64BitMode() &&
+((PtrInOperand && !IndexReg) || SM.getElementSize() > 0)) {
   DefaultBaseReg = X86::RIP;
 }
 if (IsUnconditionalBranch) {

diff  --git a/llvm/test/tools/llvm-ml/rip_relative_addressing.asm 
b/llvm/test/tools/llvm-ml/rip_relative_addressing.asm
index d237e84435b7d6..c005b9721c07e0 100644
--- a/llvm/test/tools/llvm-ml/rip_relative_addressing.asm
+++ b/llvm/test/tools/llvm-ml/rip_relative_addressing.asm
@@ -53,4 +53,14 @@ mov eax, [t8]
 ; CHECK-LABEL: t8:
 ; CHECK: mov eax, dword ptr [t8]
 
-END
\ No newline at end of file
+t9:
+mov eax, dword ptr [bar]
+; CHECK-LABEL: t9:
+; CHECK-32: mov eax, dword ptr [bar]
+; CHECK-64: mov eax, dword ptr [rip + bar]
+
+t10:
+mov ebx, dword ptr [4*eax]
+; CHECK: mov ebx, dword ptr [4*eax]
+
+END



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: Reland [llvm-ml] Fix RIP-relative addressing for ptr operands (#108061) (PR #109091)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109091

>From 22139b36d7a81cd2dc08e9499a915def4aefc64e Mon Sep 17 00:00:00 2001
From: Andrew Ng 
Date: Fri, 13 Sep 2024 12:19:42 +0100
Subject: [PATCH] Reland [llvm-ml] Fix RIP-relative addressing for ptr operands
 (#108061)

Relands #107618 with fix for assertion triggered by OpenMP runtime MASM
assembly source.

(cherry picked from commit 7574e1ddc4be63628cb7617857cc8938058a79d2)
---
 llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp  |  3 ++-
 llvm/test/tools/llvm-ml/rip_relative_addressing.asm | 12 +++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp 
b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c7f88fed9b128b..efbcb57add98c9 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2707,7 +2707,8 @@ bool X86AsmParser::parseIntelOperand(OperandVector 
&Operands, StringRef Name) {
   bool MaybeDirectBranchDest = true;
 
   if (Parser.isParsingMasm()) {
-if (is64BitMode() && SM.getElementSize() > 0) {
+if (is64BitMode() &&
+((PtrInOperand && !IndexReg) || SM.getElementSize() > 0)) {
   DefaultBaseReg = X86::RIP;
 }
 if (IsUnconditionalBranch) {
diff --git a/llvm/test/tools/llvm-ml/rip_relative_addressing.asm 
b/llvm/test/tools/llvm-ml/rip_relative_addressing.asm
index d237e84435b7d6..c005b9721c07e0 100644
--- a/llvm/test/tools/llvm-ml/rip_relative_addressing.asm
+++ b/llvm/test/tools/llvm-ml/rip_relative_addressing.asm
@@ -53,4 +53,14 @@ mov eax, [t8]
 ; CHECK-LABEL: t8:
 ; CHECK: mov eax, dword ptr [t8]
 
-END
\ No newline at end of file
+t9:
+mov eax, dword ptr [bar]
+; CHECK-LABEL: t9:
+; CHECK-32: mov eax, dword ptr [bar]
+; CHECK-64: mov eax, dword ptr [rip + bar]
+
+t10:
+mov ebx, dword ptr [4*eax]
+; CHECK: mov ebx, dword ptr [4*eax]
+
+END

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [C++20] [Modules] Treat in class defined member functions in language linkage as implicitly inline (PR #109077)

2024-09-23 Thread Chuanqi Xu via llvm-branch-commits

https://github.com/ChuanqiXu9 approved this pull request.


https://github.com/llvm/llvm-project/pull/109077
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: Reland [llvm-ml] Fix RIP-relative addressing for ptr operands (#108061) (PR #109091)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/109091
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [C++20] [Modules] Treat constexpr/consteval member function as implicitly inline (PR #109076)

2024-09-23 Thread Chuanqi Xu via llvm-branch-commits

ChuanqiXu9 wrote:

> @ChuanqiXu9 this looks safe enough to be picked. Does the PR look fine to you?

Yes, I'll try to approve it formally.

https://github.com/llvm/llvm-project/pull/109076
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [C++20] [Modules] Treat constexpr/consteval member function as implicitly inline (PR #109076)

2024-09-23 Thread Chuanqi Xu via llvm-branch-commits

https://github.com/ChuanqiXu9 approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/109076
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557) (PR #109110)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109110

>From 910dde5780f9e8f3a16612bd89b512b2594e9ab7 Mon Sep 17 00:00:00 2001
From: hev 
Date: Sat, 14 Sep 2024 11:19:34 +0800
Subject: [PATCH] [LoongArch][sanitizer] Fix SC_ADDRERR_{RD,WR} missing in the
 musl environment (#108557)

Fixes #108550

(cherry picked from commit 1825cf28dc83113200b623ebcf063eea35ade79a)
---
 compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp | 12 
 1 file changed, 12 insertions(+)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 648df0c4e5a760..b9b1f496df7c98 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -2014,6 +2014,18 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() 
const {
 return Unknown;
   return esr & ESR_ELx_WNR ? Write : Read;
 #  elif defined(__loongarch__)
+  // In the musl environment, the Linux kernel uapi sigcontext.h is not
+  // included in signal.h. To avoid missing the SC_ADDRERR_{RD,WR} macros,
+  // copy them here. The LoongArch Linux kernel uapi is already stable,
+  // so there's no need to worry about the value changing.
+#ifndef SC_ADDRERR_RD
+  // Address error was due to memory load
+#  define SC_ADDRERR_RD (1 << 30)
+#endif
+#ifndef SC_ADDRERR_WR
+  // Address error was due to memory store
+#  define SC_ADDRERR_WR (1 << 31)
+#endif
   u32 flags = ucontext->uc_mcontext.__flags;
   if (flags & SC_ADDRERR_RD)
 return SignalContext::Read;

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557) (PR #109110)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Looks safe enough.

https://github.com/llvm/llvm-project/pull/109110
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] 910dde5 - [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

Author: hev
Date: 2024-09-24T08:16:20+02:00
New Revision: 910dde5780f9e8f3a16612bd89b512b2594e9ab7

URL: 
https://github.com/llvm/llvm-project/commit/910dde5780f9e8f3a16612bd89b512b2594e9ab7
DIFF: 
https://github.com/llvm/llvm-project/commit/910dde5780f9e8f3a16612bd89b512b2594e9ab7.diff

LOG: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD,WR} missing in the musl 
environment (#108557)

Fixes #108550

(cherry picked from commit 1825cf28dc83113200b623ebcf063eea35ade79a)

Added: 


Modified: 
compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp

Removed: 




diff  --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp 
b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 648df0c4e5a760..b9b1f496df7c98 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -2014,6 +2014,18 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() 
const {
 return Unknown;
   return esr & ESR_ELx_WNR ? Write : Read;
 #  elif defined(__loongarch__)
+  // In the musl environment, the Linux kernel uapi sigcontext.h is not
+  // included in signal.h. To avoid missing the SC_ADDRERR_{RD,WR} macros,
+  // copy them here. The LoongArch Linux kernel uapi is already stable,
+  // so there's no need to worry about the value changing.
+#ifndef SC_ADDRERR_RD
+  // Address error was due to memory load
+#  define SC_ADDRERR_RD (1 << 30)
+#endif
+#ifndef SC_ADDRERR_WR
+  // Address error was due to memory store
+#  define SC_ADDRERR_WR (1 << 31)
+#endif
   u32 flags = ucontext->uc_mcontext.__flags;
   if (flags & SC_ADDRERR_RD)
 return SignalContext::Read;



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557) (PR #109110)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/109110
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: Reland [llvm-ml] Fix RIP-relative addressing for ptr operands (#108061) (PR #109091)

2024-09-23 Thread via llvm-branch-commits

github-actions[bot] wrote:

@mstorsjo (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/109091
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Hi, since we are wrapping up this next release we are very strict with the 
fixes we pick at this point. Can you please respond to the following questions 
to help me understand if this has to be included in the final release or not.

Is this PR a fix for a regression or a critical issue?

What is the risk of accepting this into the release branch?

What is the risk of NOT accepting this into the release branch?



https://github.com/llvm/llvm-project/pull/109125
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] release/19.x: [LoongArch][sanitizer] Fix SC_ADDRERR_{RD, WR} missing in the musl environment (#108557) (PR #109110)

2024-09-23 Thread via llvm-branch-commits

github-actions[bot] wrote:

@heiher (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/109110
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Follow-up to "Poison Pills are Too Toxic" (PR #109291)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109291

>From 57b439c546276719dfb63e172014aa172987 Mon Sep 17 00:00:00 2001
From: Jakub Mazurkiewicz 
Date: Wed, 10 Apr 2024 23:12:22 +0200
Subject: [PATCH 1/2] [libc++] Follow-up to "Poison Pills are Too Toxic"

* Update release notes and `Cxx23.html`
* Update `__cpp_lib_ranges` feature test macro
---
 libcxx/docs/FeatureTestMacroTable.rst| 2 ++
 libcxx/docs/ReleaseNotes/19.rst  | 1 +
 libcxx/docs/Status/Cxx23.rst | 1 +
 libcxx/docs/Status/Cxx23Papers.csv   | 2 +-
 libcxx/include/version   | 5 -
 .../algorithm.version.compile.pass.cpp   | 9 +
 .../functional.version.compile.pass.cpp  | 9 +
 .../iterator.version.compile.pass.cpp| 9 +
 .../memory.version.compile.pass.cpp  | 9 +
 .../ranges.version.compile.pass.cpp  | 9 +
 .../version.version.compile.pass.cpp | 9 +
 libcxx/utils/generate_feature_test_macro_components.py   | 1 +
 12 files changed, 40 insertions(+), 26 deletions(-)

diff --git a/libcxx/docs/FeatureTestMacroTable.rst 
b/libcxx/docs/FeatureTestMacroTable.rst
index a1506e115fe70f..7f95f0f4e1c17c 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -350,6 +350,8 @@ Status
 -- 
-
 ``__cpp_lib_print````202207L``
 -- 
-
+``__cpp_lib_ranges``   ``202211L``
+-- 
-
 ``__cpp_lib_ranges_as_const``  *unimplemented*
 -- 
-
 ``__cpp_lib_ranges_as_rvalue`` ``202207L``
diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst
index 92896f6b0d11e7..26210ddb274e5f 100644
--- a/libcxx/docs/ReleaseNotes/19.rst
+++ b/libcxx/docs/ReleaseNotes/19.rst
@@ -77,6 +77,7 @@ Implemented Papers
 - P2602R2 - Poison Pills are Too Toxic
 - P1981R0 - Rename ``leap`` to ``leap_second``
 - P1982R0 - Rename ``link`` to ``time_zone_link``
+- P2602R2 - Poison Pills are Too Toxic (as DR against C++20)
 
 
 Improvements and New Features
diff --git a/libcxx/docs/Status/Cxx23.rst b/libcxx/docs/Status/Cxx23.rst
index 23d30c8128d71e..8c1cae8b3e3b2f 100644
--- a/libcxx/docs/Status/Cxx23.rst
+++ b/libcxx/docs/Status/Cxx23.rst
@@ -44,6 +44,7 @@ Paper Status
.. [#note-P1413R3] P1413R3: ``std::aligned_storage_t`` and 
``std::aligned_union_t`` are marked deprecated, but
   clang doesn't issue a diagnostic for deprecated using template 
declarations.
.. [#note-P2520R0] P2520R0: Libc++ implemented this paper as a DR in C++20 
as well.
+   .. [#note-P2602R2] P2602R2: Libc++ implemented this paper as a DR in C++20 
as well.
.. [#note-P2711R1] P2711R1: ``join_with_view`` hasn't been done yet since 
this type isn't implemented yet.
.. [#note-P2770R0] P2770R0: ``join_with_view`` hasn't been done yet since 
this type isn't implemented yet.
.. [#note-P2693R1] P2693R1: The formatter for ``std::thread::id`` is 
implemented.
diff --git a/libcxx/docs/Status/Cxx23Papers.csv 
b/libcxx/docs/Status/Cxx23Papers.csv
index 92f4908487ae72..f46bb844532029 100644
--- a/libcxx/docs/Status/Cxx23Papers.csv
+++ b/libcxx/docs/Status/Cxx23Papers.csv
@@ -100,7 +100,7 @@
 "`P2396R1 `__","LWG", "Concurrency TS 2 fixes ", 
"November 2022","","","|concurrency TS|"
 "`P2505R5 `__","LWG", "Monadic Functions for 
``std::expected``", "November 2022","|Complete|","17.0",""
 "`P2539R4 `__","LWG", "Should the output of 
``std::print`` to a terminal be synchronized with the underlying stream?", 
"November 2022","|Complete|","18.0","|format|"
-"`P2602R2 `__","LWG", "Poison Pills are Too Toxic", 
"November 2022","|Complete|","19.0","|ranges|"
+"`P2602R2 `__","LWG", "Poison Pills are Too Toxic", 
"November 2022","|Complete| [#note-P2602R2]_","19.0","|ranges| |DR|"
 "`P2708R1 `__","LWG", "No Further Fundamentals 
TSes", "November 2022","|Nothing to do|","",""
 "","","","","","",""
 "`P0290R4 `__","LWG", "``apply()`` for 
``synchronized_value``","February 2023","","","|concurrency TS|"
diff --git a/libcxx/include/version b/libcxx/include/version
index fe64343eafbc9c..c8a31f77a915e1 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -182,8 +182,9 @@ __cpp_lib_philox_engine

[llvm-branch-commits] [lld] release/19.x: [ELF] --icf: don't fold a section without relocation and a section with relocations for SHT_CREL (PR #109309)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109309

>From 1720219a1dea5c9b99df6dd1fdfb9dc8e77054fd Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Wed, 18 Sep 2024 23:06:12 -0700
Subject: [PATCH] [ELF] --icf: don't fold a section without relocation and a
 section with relocations for SHT_CREL

Similar to commit 686cff17cc310884e48ae963bf7507f96950cc90 for SHT_REL (#57693).
CREL hasn't been tested with ICF before.

And avoid a pitfall that eqClass[0] might interfere with ICF.

(cherry picked from commit e82f0838ae88ad69515ebec234765e3e2607bebf)
---
 lld/ELF/ICF.cpp  | 4 ++--
 lld/ELF/InputSection.cpp | 6 +++---
 lld/ELF/InputSection.h   | 4 
 lld/test/ELF/icf10.s | 3 +++
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index 44e8a71cc62869..5591c5e71e0b1a 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -324,7 +324,7 @@ bool ICF::equalsConstant(const InputSection *a, const 
InputSection *b) {
 
   const RelsOrRelas ra = a->template relsOrRelas();
   const RelsOrRelas rb = b->template relsOrRelas();
-  if (ra.areRelocsCrel())
+  if (ra.areRelocsCrel() || rb.areRelocsCrel())
 return constantEq(a, ra.crels, b, rb.crels);
   return ra.areRelocsRel() || rb.areRelocsRel()
  ? constantEq(a, ra.rels, b, rb.rels)
@@ -376,7 +376,7 @@ template 
 bool ICF::equalsVariable(const InputSection *a, const InputSection *b) {
   const RelsOrRelas ra = a->template relsOrRelas();
   const RelsOrRelas rb = b->template relsOrRelas();
-  if (ra.areRelocsCrel())
+  if (ra.areRelocsCrel() || rb.areRelocsCrel())
 return variableEq(a, ra.crels, b, rb.crels);
   return ra.areRelocsRel() || rb.areRelocsRel()
  ? variableEq(a, ra.rels, b, rb.rels)
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 570e485455bade..a165c813d4259c 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -150,12 +150,12 @@ RelsOrRelas InputSectionBase::relsOrRelas(bool 
supportsCrel) const {
 InputSectionBase *const &relSec = f->getSections()[relSecIdx];
 // Otherwise, allocate a buffer to hold the decoded RELA relocations. When
 // called for the first time, relSec is null (without --emit-relocs) or an
-// InputSection with zero eqClass[0].
-if (!relSec || !cast(relSec)->eqClass[0]) {
+// InputSection with false decodedCrel.
+if (!relSec || !cast(relSec)->decodedCrel) {
   auto *sec = makeThreadLocal(*f, shdr, name);
   f->cacheDecodedCrel(relSecIdx, sec);
   sec->type = SHT_RELA;
-  sec->eqClass[0] = SHT_RELA;
+  sec->decodedCrel = true;
 
   RelocsCrel entries(sec->content_);
   sec->size = entries.size() * sizeof(typename ELFT::Rela);
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 6659530a9c9c26..afa6ee5bd0826f 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -176,6 +176,10 @@ class InputSectionBase : public SectionBase {
 
   mutable bool compressed = false;
 
+  // Whether this section is SHT_CREL and has been decoded to RELA by
+  // relsOrRelas.
+  bool decodedCrel = false;
+
   // Whether the section needs to be padded with a NOP filler due to
   // deleteFallThruJmpInsn.
   bool nopFiller = false;
diff --git a/lld/test/ELF/icf10.s b/lld/test/ELF/icf10.s
index 3c18c431c3b9da..ff926d0e16b103 100644
--- a/lld/test/ELF/icf10.s
+++ b/lld/test/ELF/icf10.s
@@ -5,6 +5,9 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-freebsd %s -o %t.o
 # RUN: ld.lld --icf=all %t.o -o /dev/null --print-icf-sections 2>&1 | 
FileCheck %s
 
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o --crel
+# RUN: ld.lld --icf=all %t.o -o /dev/null --print-icf-sections 2>&1 | 
FileCheck %s
+
 # Checks that ICF does not merge 2 sections the offset of
 # the relocations of which differ.
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Follow-up to "Poison Pills are Too Toxic" (PR #109291)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Rebasing since we have several CI failures.

https://github.com/llvm/llvm-project/pull/109291
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] release/19.x: [ELF] --icf: don't fold a section without relocation and a section with relocations for SHT_CREL (PR #109309)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/109309
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Adjust the version of __cpp_lib_ranges in C++20 mode (PR #109324)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109324

>From bb606ca6bff4555ffc2d7bd81edfc85434937141 Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Thu, 19 Sep 2024 15:40:47 -0400
Subject: [PATCH] [libc++] Adjust the version of __cpp_lib_ranges in C++20 mode

This is a (very partial) cherry-pick of #101715 to fix this oversight
in the LLVM 19 release.
---
 libcxx/docs/FeatureTestMacroTable.rst  |  2 +-
 libcxx/include/version |  4 ++--
 .../algorithm.version.compile.pass.cpp | 14 +++---
 .../functional.version.compile.pass.cpp| 14 +++---
 .../iterator.version.compile.pass.cpp  | 14 +++---
 .../memory.version.compile.pass.cpp| 14 +++---
 .../ranges.version.compile.pass.cpp| 14 +++---
 .../version.version.compile.pass.cpp   | 14 +++---
 .../generate_feature_test_macro_components.py  |  2 +-
 9 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/libcxx/docs/FeatureTestMacroTable.rst 
b/libcxx/docs/FeatureTestMacroTable.rst
index a1506e115fe70f..4d532e2bd7319e 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -266,7 +266,7 @@ Status
 -- 
-
 ``__cpp_lib_polymorphic_allocator````201902L``
 -- 
-
-``__cpp_lib_ranges``   ``202207L``
+``__cpp_lib_ranges``   ``202110L``
 -- 
-
 ``__cpp_lib_remove_cvref`` ``201711L``
 -- 
-
diff --git a/libcxx/include/version b/libcxx/include/version
index fe64343eafbc9c..f53557bbe23d8a 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -182,7 +182,7 @@ __cpp_lib_philox_engine 
202406L 
 __cpp_lib_polymorphic_allocator 201902L 

 __cpp_lib_print 202207L  

 __cpp_lib_quoted_string_io  201304L 
-__cpp_lib_ranges202207L  
 
+__cpp_lib_ranges202110L  
 
  

 __cpp_lib_ranges_as_const   202207L 
 __cpp_lib_ranges_as_rvalue  202207L 
@@ -428,7 +428,7 @@ __cpp_lib_void_t
201411L 
 # if _LIBCPP_AVAILABILITY_HAS_PMR
 #   define __cpp_lib_polymorphic_allocator  201902L
 # endif
-# define __cpp_lib_ranges   202207L
+# define __cpp_lib_ranges   202110L
 # define __cpp_lib_remove_cvref 201711L
 # if !defined(_LIBCPP_HAS_NO_THREADS) && _LIBCPP_AVAILABILITY_HAS_SYNC
 #   define __cpp_lib_semaphore  201907L
diff --git 
a/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp
 
b/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp
index ded80060632419..932dea1c5cf007 100644
--- 
a/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp
+++ 
b/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp
@@ -21,7 +21,7 @@
 __cpp_lib_default_template_type_for_algorithm_values202403L [C++26]
 __cpp_lib_freestanding_algorithm202311L [C++26]
 __cpp_lib_parallel_algorithm201603L [C++17]
-__cpp_lib_ranges202207L [C++20]
+__cpp_lib_ranges202110L [C++20]
 __cpp_lib_ranges_contains   202207L [C++23]
 __cpp_lib_ranges_find_last  202207L [C++23]
 __cpp_lib_ranges_starts_ends_with   202106L [C++23]
@@ -244,8 +244,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++20"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++20"
+# if __cpp_lib_ranges != 202110L
+#   error "__cpp_lib_ranges should have the value 202110L in c++20"
 # endif
 
 # ifdef __cpp_lib_ranges_contains
@@ -321,8 +321,8 @@
 # ifndef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should be defined in c++23"
 # endif
-# if __cpp_lib_ranges != 202207L
-#   error "__cpp_lib_ranges should have the value 202207L in c++23"
+# if __cpp_lib_ranges !=

[llvm-branch-commits] [lld] 1720219 - [ELF] --icf: don't fold a section without relocation and a section with relocations for SHT_CREL

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

Author: Fangrui Song
Date: 2024-09-24T08:19:09+02:00
New Revision: 1720219a1dea5c9b99df6dd1fdfb9dc8e77054fd

URL: 
https://github.com/llvm/llvm-project/commit/1720219a1dea5c9b99df6dd1fdfb9dc8e77054fd
DIFF: 
https://github.com/llvm/llvm-project/commit/1720219a1dea5c9b99df6dd1fdfb9dc8e77054fd.diff

LOG: [ELF] --icf: don't fold a section without relocation and a section with 
relocations for SHT_CREL

Similar to commit 686cff17cc310884e48ae963bf7507f96950cc90 for SHT_REL (#57693).
CREL hasn't been tested with ICF before.

And avoid a pitfall that eqClass[0] might interfere with ICF.

(cherry picked from commit e82f0838ae88ad69515ebec234765e3e2607bebf)

Added: 


Modified: 
lld/ELF/ICF.cpp
lld/ELF/InputSection.cpp
lld/ELF/InputSection.h
lld/test/ELF/icf10.s

Removed: 




diff  --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index 44e8a71cc62869..5591c5e71e0b1a 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -324,7 +324,7 @@ bool ICF::equalsConstant(const InputSection *a, const 
InputSection *b) {
 
   const RelsOrRelas ra = a->template relsOrRelas();
   const RelsOrRelas rb = b->template relsOrRelas();
-  if (ra.areRelocsCrel())
+  if (ra.areRelocsCrel() || rb.areRelocsCrel())
 return constantEq(a, ra.crels, b, rb.crels);
   return ra.areRelocsRel() || rb.areRelocsRel()
  ? constantEq(a, ra.rels, b, rb.rels)
@@ -376,7 +376,7 @@ template 
 bool ICF::equalsVariable(const InputSection *a, const InputSection *b) {
   const RelsOrRelas ra = a->template relsOrRelas();
   const RelsOrRelas rb = b->template relsOrRelas();
-  if (ra.areRelocsCrel())
+  if (ra.areRelocsCrel() || rb.areRelocsCrel())
 return variableEq(a, ra.crels, b, rb.crels);
   return ra.areRelocsRel() || rb.areRelocsRel()
  ? variableEq(a, ra.rels, b, rb.rels)

diff  --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 570e485455bade..a165c813d4259c 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -150,12 +150,12 @@ RelsOrRelas InputSectionBase::relsOrRelas(bool 
supportsCrel) const {
 InputSectionBase *const &relSec = f->getSections()[relSecIdx];
 // Otherwise, allocate a buffer to hold the decoded RELA relocations. When
 // called for the first time, relSec is null (without --emit-relocs) or an
-// InputSection with zero eqClass[0].
-if (!relSec || !cast(relSec)->eqClass[0]) {
+// InputSection with false decodedCrel.
+if (!relSec || !cast(relSec)->decodedCrel) {
   auto *sec = makeThreadLocal(*f, shdr, name);
   f->cacheDecodedCrel(relSecIdx, sec);
   sec->type = SHT_RELA;
-  sec->eqClass[0] = SHT_RELA;
+  sec->decodedCrel = true;
 
   RelocsCrel entries(sec->content_);
   sec->size = entries.size() * sizeof(typename ELFT::Rela);

diff  --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 6659530a9c9c26..afa6ee5bd0826f 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -176,6 +176,10 @@ class InputSectionBase : public SectionBase {
 
   mutable bool compressed = false;
 
+  // Whether this section is SHT_CREL and has been decoded to RELA by
+  // relsOrRelas.
+  bool decodedCrel = false;
+
   // Whether the section needs to be padded with a NOP filler due to
   // deleteFallThruJmpInsn.
   bool nopFiller = false;

diff  --git a/lld/test/ELF/icf10.s b/lld/test/ELF/icf10.s
index 3c18c431c3b9da..ff926d0e16b103 100644
--- a/lld/test/ELF/icf10.s
+++ b/lld/test/ELF/icf10.s
@@ -5,6 +5,9 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-freebsd %s -o %t.o
 # RUN: ld.lld --icf=all %t.o -o /dev/null --print-icf-sections 2>&1 | 
FileCheck %s
 
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o --crel
+# RUN: ld.lld --icf=all %t.o -o /dev/null --print-icf-sections 2>&1 | 
FileCheck %s
+
 # Checks that ICF does not merge 2 sections the offset of
 # the relocations of which 
diff er.
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] release/19.x: [ELF] --icf: don't fold a section without relocation and a section with relocations for SHT_CREL (PR #109309)

2024-09-23 Thread via llvm-branch-commits

github-actions[bot] wrote:

@MaskRay (or anyone else). If you would like to add a note about this fix in 
the release notes (completely optional). Please reply to this comment with a 
one or two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/109309
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [X86][APX] Fix wrong encoding of promoted KMOV instructions due to missing NoCD8 (#109579) (PR #109635)

2024-09-23 Thread Simon Pilgrim via llvm-branch-commits

https://github.com/RKSimon approved this pull request.


https://github.com/llvm/llvm-project/pull/109635
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [AA] Take account of C++23's stricter rules for forward declarations (NFC) (#109416) (PR #109476)

2024-09-23 Thread via llvm-branch-commits

github-actions[bot] wrote:

@nikic (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/109476
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [DAGCombiner] cache negative result from getMergeStoreCandidates() (#106949) (PR #108397)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/108397

>From b91df284f019fd00c9d0ae64a6ce815872071d8e Mon Sep 17 00:00:00 2001
From: Princeton Ferro 
Date: Wed, 4 Sep 2024 07:18:53 -0700
Subject: [PATCH] [DAGCombiner] cache negative result from
 getMergeStoreCandidates() (#106949)

Cache negative search result from getStoreMergeCandidates() so that
mergeConsecutiveStores() does not iterate quadratically over a
potentially long sequence of unmergeable stores.

(cherry picked from commit 8f77d37f256809766fd83a09c6d144b785e9165a)
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 83 ---
 1 file changed, 51 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 71cdec91e5f67a..7b1f1dc40211d5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -191,6 +191,11 @@ namespace {
 // AA - Used for DAG load/store alias analysis.
 AliasAnalysis *AA;
 
+/// This caches all chains that have already been processed in
+/// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
+/// stores candidates.
+SmallPtrSet ChainsWithoutMergeableStores;
+
 /// When an instruction is simplified, add all users of the instruction to
 /// the work lists because they might get more simplified now.
 void AddUsersToWorklist(SDNode *N) {
@@ -776,11 +781,10 @@ namespace {
  bool UseTrunc);
 
 /// This is a helper function for mergeConsecutiveStores. Stores that
-/// potentially may be merged with St are placed in StoreNodes. RootNode is
-/// a chain predecessor to all store candidates.
-void getStoreMergeCandidates(StoreSDNode *St,
- SmallVectorImpl &StoreNodes,
- SDNode *&Root);
+/// potentially may be merged with St are placed in StoreNodes. On success,
+/// returns a chain predecessor to all store candidates.
+SDNode *getStoreMergeCandidates(StoreSDNode *St,
+SmallVectorImpl &StoreNodes);
 
 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
 /// have indirect dependency through their operands. RootNode is the
@@ -1782,6 +1786,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
 
 ++NodesCombined;
 
+// Invalidate cached info.
+ChainsWithoutMergeableStores.clear();
+
 // If we get back the same node we passed in, rather than a new node or
 // zero, we know that the node must have defined multiple values and
 // CombineTo was used.  Since CombineTo takes care of the worklist
@@ -20372,15 +20379,15 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
   return true;
 }
 
-void DAGCombiner::getStoreMergeCandidates(
-StoreSDNode *St, SmallVectorImpl &StoreNodes,
-SDNode *&RootNode) {
+SDNode *
+DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
+ SmallVectorImpl &StoreNodes) {
   // This holds the base pointer, index, and the offset in bytes from the base
   // pointer. We must have a base and an offset. Do not handle stores to undef
   // base pointers.
   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
-return;
+return nullptr;
 
   SDValue Val = peekThroughBitcasts(St->getValue());
   StoreSource StoreSrc = getStoreSource(Val);
@@ -20396,14 +20403,14 @@ void DAGCombiner::getStoreMergeCandidates(
 LoadVT = Ld->getMemoryVT();
 // Load and store should be the same type.
 if (MemVT != LoadVT)
-  return;
+  return nullptr;
 // Loads must only have one use.
 if (!Ld->hasNUsesOfValue(1, 0))
-  return;
+  return nullptr;
 // The memory operands must not be volatile/indexed/atomic.
 // TODO: May be able to relax for unordered atomics (see D66309)
 if (!Ld->isSimple() || Ld->isIndexed())
-  return;
+  return nullptr;
   }
   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
 int64_t &Offset) -> bool {
@@ -20471,6 +20478,27 @@ void DAGCombiner::getStoreMergeCandidates(
 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
   };
 
+  // We are looking for a root node which is an ancestor to all mergable
+  // stores. We search up through a load, to our root and then down
+  // through all children. For instance we will find Store{1,2,3} if
+  // St is Store1, Store2. or Store3 where the root is not a load
+  // which always true for nonvolatile ops. TODO: Expand
+  // the search to find all valid candidates through multiple layers of loads.
+  //
+  // Root
+  // |---|---|
+  // LoadLoadStore3
+  // |   |
+  // Store1   Store2
+  //
+  // FIXME: We should be able to climb and
+  // descend TokenFactors to find candidates as well.
+
+  S

[llvm-branch-commits] [llvm] release/19.x: [bolt][tests] Skip tests that use perf when perf counters are unavailable (#107892) (PR #109072)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109072

>From de7ee2e3ae893d61c0fa1a601e84ed538ec85e70 Mon Sep 17 00:00:00 2001
From: Tom Stellard 
Date: Tue, 17 Sep 2024 17:07:35 -0700
Subject: [PATCH] [bolt][tests] Skip tests that use perf when perf counters are
 unavailable (#107892)

On the GitHub Action runners, perf always fails with the error below ,
so we need to skip the perf tests on platforms like this that have
limited access to the perf counters.

```
Access to performance monitoring and observability operations is limited.
Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open
access to performance monitoring and observability operations for processes
without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.
More information can be found at 'Perf events and tool security' document:
https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html
perf_event_paranoid setting is 4:
  -1: Allow use of (almost) all events by all users
  Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
>= 0: Disallow raw and ftrace function tracepoint access
>= 1: Disallow CPU event access
>= 2: Disallow kernel profiling
To make the adjusted perf_event_paranoid setting permanent preserve it
in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = )
```

(cherry picked from commit 773353b20a49bfa0dab608d415c1b4734d037fce)
---
 bolt/test/perf2bolt/lit.local.cfg | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bolt/test/perf2bolt/lit.local.cfg 
b/bolt/test/perf2bolt/lit.local.cfg
index 4ee9ad08cc78a0..0fecf913aa98b8 100644
--- a/bolt/test/perf2bolt/lit.local.cfg
+++ b/bolt/test/perf2bolt/lit.local.cfg
@@ -1,4 +1,5 @@
 import shutil
+import subprocess
 
-if shutil.which("perf") is not None:
-config.available_features.add("perf")
\ No newline at end of file
+if shutil.which("perf") is not None and subprocess.run(["perf", "record", 
"-e", "cycles:u", "-o", "/dev/null", "--", "perf", "--version"], 
capture_output=True).returncode == 0:
+config.available_features.add("perf")

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [bolt][tests] Skip tests that use perf when perf counters are unavailable (#107892) (PR #109072)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru closed https://github.com/llvm/llvm-project/pull/109072
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [ORC] Remove EDU from dependants list of dependencies before destroying. (PR #109355)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Hi, since we are wrapping up this next release we are very strict with the 
fixes we pick at this point. Can you please respond to the following questions 
to help me understand if this has to be included in the final release or not.

Is this PR a fix for a regression or a critical issue?

What is the risk of accepting this into the release branch?

What is the risk of NOT accepting this into the release branch?



https://github.com/llvm/llvm-project/pull/109355
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Adjust the version of __cpp_lib_ranges in C++20 mode (PR #109324)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Many CI failures here as well. Can you check @ldionne ?

https://github.com/llvm/llvm-project/pull/109324
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoopPeel] Fix LCSSA phi node invalidation (PR #109624)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

Is this safe enough to merge @nikic ? any risks?

https://github.com/llvm/llvm-project/pull/109624
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [X86][APX] Fix wrong encoding of promoted KMOV instructions due to missing NoCD8 (#109579) (PR #109635)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

tru wrote:

@RKSimon fine and safe to backport?

https://github.com/llvm/llvm-project/pull/109635
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [Clang][Concepts] Fix the constraint equivalence checking involving parameter packs (#102131) (PR #106043)

2024-09-23 Thread Tobias Hieta via llvm-branch-commits

https://github.com/tru updated https://github.com/llvm/llvm-project/pull/106043

>From 3e512ba17d90d5ab5382bae61a3e5762d2e3f42a Mon Sep 17 00:00:00 2001
From: Younan Zhang 
Date: Mon, 26 Aug 2024 14:30:26 +0800
Subject: [PATCH] [Clang][Concepts] Fix the constraint equivalence checking
 involving parameter packs (#102131)

We established an instantiation scope in order for constraint
equivalence checking to properly map the uninstantiated parameters.

That mechanism mapped even packs to themselves. Consequently, parameter
packs e.g. appearing in a function call, were not expanded. So they
would end up becoming `SubstTemplateTypeParmPackType`s that circularly
depend on the canonical declaration of the function template, which is
not yet determined, hence the spurious error.

No release note as I plan to backport it to 19.

Fixes https://github.com/llvm/llvm-project/issues/101735

-

Co-authored-by: cor3ntin 
(cherry picked from commit e6974daa7bc100c8b88057d50f3ec3eca7282243)
---
 clang/lib/Sema/SemaConcept.cpp| 26 +--
 .../SemaTemplate/concepts-out-of-line-def.cpp | 23 
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index c34d32002b5ad7..244f6ef2f53faa 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -969,8 +969,30 @@ static const Expr 
*SubstituteConstraintExpressionWithoutSatisfaction(
   // equivalence.
   LocalInstantiationScope ScopeForParameters(S);
   if (auto *FD = DeclInfo.getDecl()->getAsFunction())
-for (auto *PVD : FD->parameters())
-  ScopeForParameters.InstantiatedLocal(PVD, PVD);
+for (auto *PVD : FD->parameters()) {
+  if (!PVD->isParameterPack()) {
+ScopeForParameters.InstantiatedLocal(PVD, PVD);
+continue;
+  }
+  // This is hacky: we're mapping the parameter pack to a size-of-1 
argument
+  // to avoid building SubstTemplateTypeParmPackTypes for
+  // PackExpansionTypes. The SubstTemplateTypeParmPackType node would
+  // otherwise reference the AssociatedDecl of the template arguments, 
which
+  // is, in this case, the template declaration.
+  //
+  // However, as we are in the process of comparing potential
+  // re-declarations, the canonical declaration is the declaration itself 
at
+  // this point. So if we didn't expand these packs, we would end up with 
an
+  // incorrect profile difference because we will be profiling the
+  // canonical types!
+  //
+  // FIXME: Improve the "no-transform" machinery in FindInstantiatedDecl so
+  // that we can eliminate the Scope in the cases where the declarations 
are
+  // not necessarily instantiated. It would also benefit the noexcept
+  // specifier comparison.
+  ScopeForParameters.MakeInstantiatedLocalArgPack(PVD);
+  ScopeForParameters.InstantiatedLocalPackArg(PVD, PVD);
+}
 
   std::optional ThisScope;
 
diff --git a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp 
b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
index 0142efcdc3ee86..333187b0d74ad6 100644
--- a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
+++ b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp
@@ -599,3 +599,26 @@ template 
 unsigned long DerivedCollection::index() {}
 
 } // namespace GH72557
+
+namespace GH101735 {
+
+template 
+concept True = true;
+
+template 
+class A {
+  template 
+  void method(Ts&... ts)
+requires requires (T t) {
+  { t.method(static_cast(ts)...) } -> True;
+};
+};
+
+template 
+template 
+void A::method(Ts&... ts)
+  requires requires (T t) {
+{ t.method(static_cast(ts)...) } -> True;
+  } {}
+
+}

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/19.x: [clang-format] Reimplement InsertNewlineAtEOF (#108513) (PR #109170)

2024-09-23 Thread via llvm-branch-commits

github-actions[bot] wrote:

@owenca (or anyone else). If you would like to add a note about this fix in the 
release notes (completely optional). Please reply to this comment with a one or 
two sentence description of the fix.  When you are done, please add the 
release:note label to this PR. 

https://github.com/llvm/llvm-project/pull/109170
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [LoongArch] Eliminate the redundant sign extension of division (#107971) (PR #109125)

2024-09-23 Thread Lu Weining via llvm-branch-commits

https://github.com/SixWeining approved this pull request.

lgtm

https://github.com/llvm/llvm-project/pull/109125
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Add noalias.addrspace metadata when autoupgrading atomic intrinsics (PR #102599)

2024-09-23 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/102599

>From 1fb6376c0eaa07d8115760a41a59ef1eb989c7db Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 9 Aug 2024 14:51:41 +0400
Subject: [PATCH] AMDGPU: Add noalias.addrspace metadata when autoupgrading
 atomic intrinsics

This will be needed to continue generating the raw instruction in the flat case.
---
 llvm/lib/IR/AutoUpgrade.cpp| 13 -
 llvm/test/Bitcode/amdgcn-atomic.ll | 45 --
 2 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 3390d651d6c693..624af3c07c170e 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -34,9 +34,11 @@
 #include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/Support/AMDGPUAddrSpace.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
@@ -4216,13 +4218,22 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef 
Name, CallBase *CI,
   AtomicRMWInst *RMW =
   Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
 
-  if (PtrTy->getAddressSpace() != 3) {
+  unsigned AddrSpace = PtrTy->getAddressSpace();
+  if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
   RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
   }
 
+  if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
+MDBuilder MDB(F->getContext());
+MDNode *RangeNotPrivate =
+MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS),
+APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1));
+RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
+  }
+
   if (IsVolatile)
 RMW->setVolatile(true);
 
diff --git a/llvm/test/Bitcode/amdgcn-atomic.ll 
b/llvm/test/Bitcode/amdgcn-atomic.ll
index d642372799f56b..87ca1e3a617ed9 100644
--- a/llvm/test/Bitcode/amdgcn-atomic.ll
+++ b/llvm/test/Bitcode/amdgcn-atomic.ll
@@ -2,10 +2,10 @@
 
 
 define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, 
i32 0, i1 false)
 
-  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, 
i32 43, i32 0, i32 0, i1 false)
 
   ; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 
syncscope("agent") seq_cst, align 4{{$}}
@@ -26,10 +26,10 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, 
ptr addrspace(3) %ptr
 }
 
 define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, 
align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, 
i32 0, i1 false)
 
-  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 
syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !1
   %result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, 
i32 43, i32 0, i32 0, i1 false)
 
   ; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 
syncscope("agent") seq_cst, align 4{{$}}
@@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, 
ptr addrspace(3) %ptr
 
 ; Test some invalid ordering handling
 define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) 
%ptr3) {
-  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") 
seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
+  ; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") 
seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1{{$}}
   %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 
-1, i32 0, i1 true)
 
-  ; CHECK: