[llvm-branch-commits] [nsan] Fix style issue (PR #96142)

2024-06-20 Thread Alexander Shaposhnikov via llvm-branch-commits

alexander-shaposhnikov wrote:

thanks!

https://github.com/llvm/llvm-project/pull/96142
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [nsan] Fix style issue (PR #96142)

2024-06-20 Thread Alexander Shaposhnikov via llvm-branch-commits

https://github.com/alexander-shaposhnikov approved this pull request.


https://github.com/llvm/llvm-project/pull/96142
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [nsan] Fix style issue (PR #96142)

2024-06-20 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/96142

>From 54357a77702361abf1044d1ee5d0dee3e4fa407c Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 20 Jun 2024 00:23:01 -0700
Subject: [PATCH] more

Created using spr 1.3.5-bogner
---
 compiler-rt/lib/nsan/nsan.cpp  |  59 ++--
 compiler-rt/lib/nsan/nsan_flags.cpp|   2 -
 compiler-rt/lib/nsan/nsan_stats.cpp| 107 ++---
 compiler-rt/lib/nsan/nsan_stats.h  |  43 +
 compiler-rt/lib/nsan/nsan_suppressions.cpp |  48 +
 5 files changed, 124 insertions(+), 135 deletions(-)

diff --git a/compiler-rt/lib/nsan/nsan.cpp b/compiler-rt/lib/nsan/nsan.cpp
index ece1130f73d14..fd5390e20a029 100644
--- a/compiler-rt/lib/nsan/nsan.cpp
+++ b/compiler-rt/lib/nsan/nsan.cpp
@@ -71,7 +71,6 @@ __nsan_set_value_unknown(const u8 *addr, uptr size) {
   internal_memset((void *)getShadowTypeAddrFor(addr), 0, size);
 }
 
-namespace __nsan {
 
 const char *FTInfo::kCppTypeName = "float";
 const char *FTInfo::kCppTypeName = "double";
@@ -177,8 +176,6 @@ template  T max(T a, T b) { return a < b ? b : 
a; }
 
 } // end anonymous namespace
 
-} // end namespace __nsan
-
 void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
  void *context,
  bool request_fast,
@@ -189,7 +186,7 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, 
uptr bp,
 
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_print_accumulated_stats() 
{
   if (nsan_stats)
-nsan_stats->print();
+nsan_stats->Print();
 }
 
 static void NsanAtexit() {
@@ -228,18 +225,18 @@ __nsan_get_shadow_ptr_for_longdouble_store(u8 
*store_addr, uptr n) {
   return getShadowPtrForStore(store_addr, n);
 }
 
-template  static bool isValidShadowType(const u8 *shadow_type) {
+template  static bool IsValidShadowType(const u8 *shadow_type) {
   return __builtin_memcmp(shadow_type, FTInfo::kTypePattern, sizeof(FT)) ==
  0;
 }
 
-template  static bool isZero(const T *ptr) {
+template  static bool IsZero(const T *ptr) {
   constexpr const char kZeros[kSize] = {}; // Zero initialized.
   return __builtin_memcmp(ptr, kZeros, kSize) == 0;
 }
 
-template  static bool isUnknownShadowType(const u8 *shadow_type) {
-  return isZero::kTypePattern)>(shadow_type);
+template  static bool IsUnknownShadowType(const u8 *shadow_type) {
+  return IsZero::kTypePattern)>(shadow_type);
 }
 
 // The three folowing functions check that the address stores a complete
@@ -249,21 +246,21 @@ template 
 static const u8 *getShadowPtrForLoad(const u8 *load_addr, uptr n) {
   const u8 *const shadow_type = getShadowTypeAddrFor(load_addr);
   for (uptr i = 0; i < n; ++i) {
-if (!isValidShadowType(shadow_type + i * sizeof(FT))) {
+if (!IsValidShadowType(shadow_type + i * sizeof(FT))) {
   // If loadtracking stats are enabled, log loads with invalid types
   // (tampered with through type punning).
   if (flags().enable_loadtracking_stats) {
-if (isUnknownShadowType(shadow_type + i * sizeof(FT))) {
+if (IsUnknownShadowType(shadow_type + i * sizeof(FT))) {
   // Warn only if the value is non-zero. Zero is special because
   // applications typically initialize large buffers to zero in an
   // untyped way.
-  if (!isZero(load_addr)) {
+  if (!IsZero(load_addr)) {
 GET_CALLER_PC_BP;
-nsan_stats->addUnknownLoadTrackingEvent(pc, bp);
+nsan_stats->AddUnknownLoadTrackingEvent(pc, bp);
   }
 } else {
   GET_CALLER_PC_BP;
-  nsan_stats->addInvalidLoadTrackingEvent(pc, bp);
+  nsan_stats->AddInvalidLoadTrackingEvent(pc, bp);
 }
   }
   return nullptr;
@@ -442,7 +439,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT 
CheckType,
 // want to avoid having to move the computation of `largest` before the
 // absolute value check when this branch is not taken.
 const InternalFT largest = max(ftAbs(check_value), ftAbs(check_shadow));
-nsan_stats->addCheck(CheckType, pc, bp, abs_err / largest);
+nsan_stats->AddCheck(CheckType, pc, bp, abs_err / largest);
   }
 
   // Note: writing the comparison that way ensures that when `abs_err` is Nan
@@ -534,7 +531,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT 
CheckType,
 
   if (flags().enable_warning_stats) {
 GET_CALLER_PC_BP;
-nsan_stats->addWarning(CheckType, pc, bp, abs_err / largest);
+nsan_stats->AddWarning(CheckType, pc, bp, abs_err / largest);
   }
 
   if (flags().halt_on_error) {
@@ -565,10 +562,10 @@ __nsan_internal_check_longdouble_q(long double value, 
__float128 shadow,
   return checkFT(value, shadow, static_cast(check_type), 
check_arg);
 }
 
-static const char *getTruthValueName(bool v) { return v ? "true" : "false"; }
+static const char *GetTruthValueName(bool v) { retu

[llvm-branch-commits] [compiler-rt] [nsan] Fix style issue (PR #96142)

2024-06-20 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/96142

>From 54357a77702361abf1044d1ee5d0dee3e4fa407c Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 20 Jun 2024 00:23:01 -0700
Subject: [PATCH 1/2] more

Created using spr 1.3.5-bogner
---
 compiler-rt/lib/nsan/nsan.cpp  |  59 ++--
 compiler-rt/lib/nsan/nsan_flags.cpp|   2 -
 compiler-rt/lib/nsan/nsan_stats.cpp| 107 ++---
 compiler-rt/lib/nsan/nsan_stats.h  |  43 +
 compiler-rt/lib/nsan/nsan_suppressions.cpp |  48 +
 5 files changed, 124 insertions(+), 135 deletions(-)

diff --git a/compiler-rt/lib/nsan/nsan.cpp b/compiler-rt/lib/nsan/nsan.cpp
index ece1130f73d14..fd5390e20a029 100644
--- a/compiler-rt/lib/nsan/nsan.cpp
+++ b/compiler-rt/lib/nsan/nsan.cpp
@@ -71,7 +71,6 @@ __nsan_set_value_unknown(const u8 *addr, uptr size) {
   internal_memset((void *)getShadowTypeAddrFor(addr), 0, size);
 }
 
-namespace __nsan {
 
 const char *FTInfo::kCppTypeName = "float";
 const char *FTInfo::kCppTypeName = "double";
@@ -177,8 +176,6 @@ template  T max(T a, T b) { return a < b ? b : 
a; }
 
 } // end anonymous namespace
 
-} // end namespace __nsan
-
 void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
  void *context,
  bool request_fast,
@@ -189,7 +186,7 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, 
uptr bp,
 
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_print_accumulated_stats() 
{
   if (nsan_stats)
-nsan_stats->print();
+nsan_stats->Print();
 }
 
 static void NsanAtexit() {
@@ -228,18 +225,18 @@ __nsan_get_shadow_ptr_for_longdouble_store(u8 
*store_addr, uptr n) {
   return getShadowPtrForStore(store_addr, n);
 }
 
-template  static bool isValidShadowType(const u8 *shadow_type) {
+template  static bool IsValidShadowType(const u8 *shadow_type) {
   return __builtin_memcmp(shadow_type, FTInfo::kTypePattern, sizeof(FT)) ==
  0;
 }
 
-template  static bool isZero(const T *ptr) {
+template  static bool IsZero(const T *ptr) {
   constexpr const char kZeros[kSize] = {}; // Zero initialized.
   return __builtin_memcmp(ptr, kZeros, kSize) == 0;
 }
 
-template  static bool isUnknownShadowType(const u8 *shadow_type) {
-  return isZero::kTypePattern)>(shadow_type);
+template  static bool IsUnknownShadowType(const u8 *shadow_type) {
+  return IsZero::kTypePattern)>(shadow_type);
 }
 
 // The three folowing functions check that the address stores a complete
@@ -249,21 +246,21 @@ template 
 static const u8 *getShadowPtrForLoad(const u8 *load_addr, uptr n) {
   const u8 *const shadow_type = getShadowTypeAddrFor(load_addr);
   for (uptr i = 0; i < n; ++i) {
-if (!isValidShadowType(shadow_type + i * sizeof(FT))) {
+if (!IsValidShadowType(shadow_type + i * sizeof(FT))) {
   // If loadtracking stats are enabled, log loads with invalid types
   // (tampered with through type punning).
   if (flags().enable_loadtracking_stats) {
-if (isUnknownShadowType(shadow_type + i * sizeof(FT))) {
+if (IsUnknownShadowType(shadow_type + i * sizeof(FT))) {
   // Warn only if the value is non-zero. Zero is special because
   // applications typically initialize large buffers to zero in an
   // untyped way.
-  if (!isZero(load_addr)) {
+  if (!IsZero(load_addr)) {
 GET_CALLER_PC_BP;
-nsan_stats->addUnknownLoadTrackingEvent(pc, bp);
+nsan_stats->AddUnknownLoadTrackingEvent(pc, bp);
   }
 } else {
   GET_CALLER_PC_BP;
-  nsan_stats->addInvalidLoadTrackingEvent(pc, bp);
+  nsan_stats->AddInvalidLoadTrackingEvent(pc, bp);
 }
   }
   return nullptr;
@@ -442,7 +439,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT 
CheckType,
 // want to avoid having to move the computation of `largest` before the
 // absolute value check when this branch is not taken.
 const InternalFT largest = max(ftAbs(check_value), ftAbs(check_shadow));
-nsan_stats->addCheck(CheckType, pc, bp, abs_err / largest);
+nsan_stats->AddCheck(CheckType, pc, bp, abs_err / largest);
   }
 
   // Note: writing the comparison that way ensures that when `abs_err` is Nan
@@ -534,7 +531,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT 
CheckType,
 
   if (flags().enable_warning_stats) {
 GET_CALLER_PC_BP;
-nsan_stats->addWarning(CheckType, pc, bp, abs_err / largest);
+nsan_stats->AddWarning(CheckType, pc, bp, abs_err / largest);
   }
 
   if (flags().halt_on_error) {
@@ -565,10 +562,10 @@ __nsan_internal_check_longdouble_q(long double value, 
__float128 shadow,
   return checkFT(value, shadow, static_cast(check_type), 
check_arg);
 }
 
-static const char *getTruthValueName(bool v) { return v ? "true" : "false"; }
+static const char *GetTruthValueName(bool v) { 

[llvm-branch-commits] [compiler-rt] [nsan] Fix style issue (PR #96142)

2024-06-20 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/96142

>From 54357a77702361abf1044d1ee5d0dee3e4fa407c Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 20 Jun 2024 00:23:01 -0700
Subject: [PATCH 1/3] more

Created using spr 1.3.5-bogner
---
 compiler-rt/lib/nsan/nsan.cpp  |  59 ++--
 compiler-rt/lib/nsan/nsan_flags.cpp|   2 -
 compiler-rt/lib/nsan/nsan_stats.cpp| 107 ++---
 compiler-rt/lib/nsan/nsan_stats.h  |  43 +
 compiler-rt/lib/nsan/nsan_suppressions.cpp |  48 +
 5 files changed, 124 insertions(+), 135 deletions(-)

diff --git a/compiler-rt/lib/nsan/nsan.cpp b/compiler-rt/lib/nsan/nsan.cpp
index ece1130f73d14..fd5390e20a029 100644
--- a/compiler-rt/lib/nsan/nsan.cpp
+++ b/compiler-rt/lib/nsan/nsan.cpp
@@ -71,7 +71,6 @@ __nsan_set_value_unknown(const u8 *addr, uptr size) {
   internal_memset((void *)getShadowTypeAddrFor(addr), 0, size);
 }
 
-namespace __nsan {
 
 const char *FTInfo::kCppTypeName = "float";
 const char *FTInfo::kCppTypeName = "double";
@@ -177,8 +176,6 @@ template  T max(T a, T b) { return a < b ? b : 
a; }
 
 } // end anonymous namespace
 
-} // end namespace __nsan
-
 void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
  void *context,
  bool request_fast,
@@ -189,7 +186,7 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, 
uptr bp,
 
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_print_accumulated_stats() 
{
   if (nsan_stats)
-nsan_stats->print();
+nsan_stats->Print();
 }
 
 static void NsanAtexit() {
@@ -228,18 +225,18 @@ __nsan_get_shadow_ptr_for_longdouble_store(u8 
*store_addr, uptr n) {
   return getShadowPtrForStore(store_addr, n);
 }
 
-template  static bool isValidShadowType(const u8 *shadow_type) {
+template  static bool IsValidShadowType(const u8 *shadow_type) {
   return __builtin_memcmp(shadow_type, FTInfo::kTypePattern, sizeof(FT)) ==
  0;
 }
 
-template  static bool isZero(const T *ptr) {
+template  static bool IsZero(const T *ptr) {
   constexpr const char kZeros[kSize] = {}; // Zero initialized.
   return __builtin_memcmp(ptr, kZeros, kSize) == 0;
 }
 
-template  static bool isUnknownShadowType(const u8 *shadow_type) {
-  return isZero::kTypePattern)>(shadow_type);
+template  static bool IsUnknownShadowType(const u8 *shadow_type) {
+  return IsZero::kTypePattern)>(shadow_type);
 }
 
 // The three folowing functions check that the address stores a complete
@@ -249,21 +246,21 @@ template 
 static const u8 *getShadowPtrForLoad(const u8 *load_addr, uptr n) {
   const u8 *const shadow_type = getShadowTypeAddrFor(load_addr);
   for (uptr i = 0; i < n; ++i) {
-if (!isValidShadowType(shadow_type + i * sizeof(FT))) {
+if (!IsValidShadowType(shadow_type + i * sizeof(FT))) {
   // If loadtracking stats are enabled, log loads with invalid types
   // (tampered with through type punning).
   if (flags().enable_loadtracking_stats) {
-if (isUnknownShadowType(shadow_type + i * sizeof(FT))) {
+if (IsUnknownShadowType(shadow_type + i * sizeof(FT))) {
   // Warn only if the value is non-zero. Zero is special because
   // applications typically initialize large buffers to zero in an
   // untyped way.
-  if (!isZero(load_addr)) {
+  if (!IsZero(load_addr)) {
 GET_CALLER_PC_BP;
-nsan_stats->addUnknownLoadTrackingEvent(pc, bp);
+nsan_stats->AddUnknownLoadTrackingEvent(pc, bp);
   }
 } else {
   GET_CALLER_PC_BP;
-  nsan_stats->addInvalidLoadTrackingEvent(pc, bp);
+  nsan_stats->AddInvalidLoadTrackingEvent(pc, bp);
 }
   }
   return nullptr;
@@ -442,7 +439,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT 
CheckType,
 // want to avoid having to move the computation of `largest` before the
 // absolute value check when this branch is not taken.
 const InternalFT largest = max(ftAbs(check_value), ftAbs(check_shadow));
-nsan_stats->addCheck(CheckType, pc, bp, abs_err / largest);
+nsan_stats->AddCheck(CheckType, pc, bp, abs_err / largest);
   }
 
   // Note: writing the comparison that way ensures that when `abs_err` is Nan
@@ -534,7 +531,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT 
CheckType,
 
   if (flags().enable_warning_stats) {
 GET_CALLER_PC_BP;
-nsan_stats->addWarning(CheckType, pc, bp, abs_err / largest);
+nsan_stats->AddWarning(CheckType, pc, bp, abs_err / largest);
   }
 
   if (flags().halt_on_error) {
@@ -565,10 +562,10 @@ __nsan_internal_check_longdouble_q(long double value, 
__float128 shadow,
   return checkFT(value, shadow, static_cast(check_type), 
check_arg);
 }
 
-static const char *getTruthValueName(bool v) { return v ? "true" : "false"; }
+static const char *GetTruthValueName(bool v) { 

[llvm-branch-commits] [compiler-rt] [nsan] Fix style issue (PR #96142)

2024-06-20 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/96142

>From 54357a77702361abf1044d1ee5d0dee3e4fa407c Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 20 Jun 2024 00:23:01 -0700
Subject: [PATCH 1/3] more

Created using spr 1.3.5-bogner
---
 compiler-rt/lib/nsan/nsan.cpp  |  59 ++--
 compiler-rt/lib/nsan/nsan_flags.cpp|   2 -
 compiler-rt/lib/nsan/nsan_stats.cpp| 107 ++---
 compiler-rt/lib/nsan/nsan_stats.h  |  43 +
 compiler-rt/lib/nsan/nsan_suppressions.cpp |  48 +
 5 files changed, 124 insertions(+), 135 deletions(-)

diff --git a/compiler-rt/lib/nsan/nsan.cpp b/compiler-rt/lib/nsan/nsan.cpp
index ece1130f73d14..fd5390e20a029 100644
--- a/compiler-rt/lib/nsan/nsan.cpp
+++ b/compiler-rt/lib/nsan/nsan.cpp
@@ -71,7 +71,6 @@ __nsan_set_value_unknown(const u8 *addr, uptr size) {
   internal_memset((void *)getShadowTypeAddrFor(addr), 0, size);
 }
 
-namespace __nsan {
 
 const char *FTInfo::kCppTypeName = "float";
 const char *FTInfo::kCppTypeName = "double";
@@ -177,8 +176,6 @@ template  T max(T a, T b) { return a < b ? b : 
a; }
 
 } // end anonymous namespace
 
-} // end namespace __nsan
-
 void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
  void *context,
  bool request_fast,
@@ -189,7 +186,7 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, 
uptr bp,
 
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_print_accumulated_stats() 
{
   if (nsan_stats)
-nsan_stats->print();
+nsan_stats->Print();
 }
 
 static void NsanAtexit() {
@@ -228,18 +225,18 @@ __nsan_get_shadow_ptr_for_longdouble_store(u8 
*store_addr, uptr n) {
   return getShadowPtrForStore(store_addr, n);
 }
 
-template  static bool isValidShadowType(const u8 *shadow_type) {
+template  static bool IsValidShadowType(const u8 *shadow_type) {
   return __builtin_memcmp(shadow_type, FTInfo::kTypePattern, sizeof(FT)) ==
  0;
 }
 
-template  static bool isZero(const T *ptr) {
+template  static bool IsZero(const T *ptr) {
   constexpr const char kZeros[kSize] = {}; // Zero initialized.
   return __builtin_memcmp(ptr, kZeros, kSize) == 0;
 }
 
-template  static bool isUnknownShadowType(const u8 *shadow_type) {
-  return isZero::kTypePattern)>(shadow_type);
+template  static bool IsUnknownShadowType(const u8 *shadow_type) {
+  return IsZero::kTypePattern)>(shadow_type);
 }
 
 // The three folowing functions check that the address stores a complete
@@ -249,21 +246,21 @@ template 
 static const u8 *getShadowPtrForLoad(const u8 *load_addr, uptr n) {
   const u8 *const shadow_type = getShadowTypeAddrFor(load_addr);
   for (uptr i = 0; i < n; ++i) {
-if (!isValidShadowType(shadow_type + i * sizeof(FT))) {
+if (!IsValidShadowType(shadow_type + i * sizeof(FT))) {
   // If loadtracking stats are enabled, log loads with invalid types
   // (tampered with through type punning).
   if (flags().enable_loadtracking_stats) {
-if (isUnknownShadowType(shadow_type + i * sizeof(FT))) {
+if (IsUnknownShadowType(shadow_type + i * sizeof(FT))) {
   // Warn only if the value is non-zero. Zero is special because
   // applications typically initialize large buffers to zero in an
   // untyped way.
-  if (!isZero(load_addr)) {
+  if (!IsZero(load_addr)) {
 GET_CALLER_PC_BP;
-nsan_stats->addUnknownLoadTrackingEvent(pc, bp);
+nsan_stats->AddUnknownLoadTrackingEvent(pc, bp);
   }
 } else {
   GET_CALLER_PC_BP;
-  nsan_stats->addInvalidLoadTrackingEvent(pc, bp);
+  nsan_stats->AddInvalidLoadTrackingEvent(pc, bp);
 }
   }
   return nullptr;
@@ -442,7 +439,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT 
CheckType,
 // want to avoid having to move the computation of `largest` before the
 // absolute value check when this branch is not taken.
 const InternalFT largest = max(ftAbs(check_value), ftAbs(check_shadow));
-nsan_stats->addCheck(CheckType, pc, bp, abs_err / largest);
+nsan_stats->AddCheck(CheckType, pc, bp, abs_err / largest);
   }
 
   // Note: writing the comparison that way ensures that when `abs_err` is Nan
@@ -534,7 +531,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT 
CheckType,
 
   if (flags().enable_warning_stats) {
 GET_CALLER_PC_BP;
-nsan_stats->addWarning(CheckType, pc, bp, abs_err / largest);
+nsan_stats->AddWarning(CheckType, pc, bp, abs_err / largest);
   }
 
   if (flags().halt_on_error) {
@@ -565,10 +562,10 @@ __nsan_internal_check_longdouble_q(long double value, 
__float128 shadow,
   return checkFT(value, shadow, static_cast(check_type), 
check_arg);
 }
 
-static const char *getTruthValueName(bool v) { return v ? "true" : "false"; }
+static const char *GetTruthValueName(bool v) { 

[llvm-branch-commits] [compiler-rt] [nsan] Fix style issue (PR #96142)

2024-06-20 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/96142

>From 54357a77702361abf1044d1ee5d0dee3e4fa407c Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 20 Jun 2024 00:23:01 -0700
Subject: [PATCH 1/3] more

Created using spr 1.3.5-bogner
---
 compiler-rt/lib/nsan/nsan.cpp  |  59 ++--
 compiler-rt/lib/nsan/nsan_flags.cpp|   2 -
 compiler-rt/lib/nsan/nsan_stats.cpp| 107 ++---
 compiler-rt/lib/nsan/nsan_stats.h  |  43 +
 compiler-rt/lib/nsan/nsan_suppressions.cpp |  48 +
 5 files changed, 124 insertions(+), 135 deletions(-)

diff --git a/compiler-rt/lib/nsan/nsan.cpp b/compiler-rt/lib/nsan/nsan.cpp
index ece1130f73d14..fd5390e20a029 100644
--- a/compiler-rt/lib/nsan/nsan.cpp
+++ b/compiler-rt/lib/nsan/nsan.cpp
@@ -71,7 +71,6 @@ __nsan_set_value_unknown(const u8 *addr, uptr size) {
   internal_memset((void *)getShadowTypeAddrFor(addr), 0, size);
 }
 
-namespace __nsan {
 
 const char *FTInfo::kCppTypeName = "float";
 const char *FTInfo::kCppTypeName = "double";
@@ -177,8 +176,6 @@ template  T max(T a, T b) { return a < b ? b : 
a; }
 
 } // end anonymous namespace
 
-} // end namespace __nsan
-
 void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
  void *context,
  bool request_fast,
@@ -189,7 +186,7 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, 
uptr bp,
 
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_print_accumulated_stats() 
{
   if (nsan_stats)
-nsan_stats->print();
+nsan_stats->Print();
 }
 
 static void NsanAtexit() {
@@ -228,18 +225,18 @@ __nsan_get_shadow_ptr_for_longdouble_store(u8 
*store_addr, uptr n) {
   return getShadowPtrForStore(store_addr, n);
 }
 
-template  static bool isValidShadowType(const u8 *shadow_type) {
+template  static bool IsValidShadowType(const u8 *shadow_type) {
   return __builtin_memcmp(shadow_type, FTInfo::kTypePattern, sizeof(FT)) ==
  0;
 }
 
-template  static bool isZero(const T *ptr) {
+template  static bool IsZero(const T *ptr) {
   constexpr const char kZeros[kSize] = {}; // Zero initialized.
   return __builtin_memcmp(ptr, kZeros, kSize) == 0;
 }
 
-template  static bool isUnknownShadowType(const u8 *shadow_type) {
-  return isZero::kTypePattern)>(shadow_type);
+template  static bool IsUnknownShadowType(const u8 *shadow_type) {
+  return IsZero::kTypePattern)>(shadow_type);
 }
 
 // The three folowing functions check that the address stores a complete
@@ -249,21 +246,21 @@ template 
 static const u8 *getShadowPtrForLoad(const u8 *load_addr, uptr n) {
   const u8 *const shadow_type = getShadowTypeAddrFor(load_addr);
   for (uptr i = 0; i < n; ++i) {
-if (!isValidShadowType(shadow_type + i * sizeof(FT))) {
+if (!IsValidShadowType(shadow_type + i * sizeof(FT))) {
   // If loadtracking stats are enabled, log loads with invalid types
   // (tampered with through type punning).
   if (flags().enable_loadtracking_stats) {
-if (isUnknownShadowType(shadow_type + i * sizeof(FT))) {
+if (IsUnknownShadowType(shadow_type + i * sizeof(FT))) {
   // Warn only if the value is non-zero. Zero is special because
   // applications typically initialize large buffers to zero in an
   // untyped way.
-  if (!isZero(load_addr)) {
+  if (!IsZero(load_addr)) {
 GET_CALLER_PC_BP;
-nsan_stats->addUnknownLoadTrackingEvent(pc, bp);
+nsan_stats->AddUnknownLoadTrackingEvent(pc, bp);
   }
 } else {
   GET_CALLER_PC_BP;
-  nsan_stats->addInvalidLoadTrackingEvent(pc, bp);
+  nsan_stats->AddInvalidLoadTrackingEvent(pc, bp);
 }
   }
   return nullptr;
@@ -442,7 +439,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT 
CheckType,
 // want to avoid having to move the computation of `largest` before the
 // absolute value check when this branch is not taken.
 const InternalFT largest = max(ftAbs(check_value), ftAbs(check_shadow));
-nsan_stats->addCheck(CheckType, pc, bp, abs_err / largest);
+nsan_stats->AddCheck(CheckType, pc, bp, abs_err / largest);
   }
 
   // Note: writing the comparison that way ensures that when `abs_err` is Nan
@@ -534,7 +531,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT 
CheckType,
 
   if (flags().enable_warning_stats) {
 GET_CALLER_PC_BP;
-nsan_stats->addWarning(CheckType, pc, bp, abs_err / largest);
+nsan_stats->AddWarning(CheckType, pc, bp, abs_err / largest);
   }
 
   if (flags().halt_on_error) {
@@ -565,10 +562,10 @@ __nsan_internal_check_longdouble_q(long double value, 
__float128 shadow,
   return checkFT(value, shadow, static_cast(check_type), 
check_arg);
 }
 
-static const char *getTruthValueName(bool v) { return v ? "true" : "false"; }
+static const char *GetTruthValueName(bool v) { 

[llvm-branch-commits] [compiler-rt] [nsan] Fix style issue (PR #96142)

2024-06-20 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/96142

>From 54357a77702361abf1044d1ee5d0dee3e4fa407c Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Thu, 20 Jun 2024 00:23:01 -0700
Subject: [PATCH 1/4] more

Created using spr 1.3.5-bogner
---
 compiler-rt/lib/nsan/nsan.cpp  |  59 ++--
 compiler-rt/lib/nsan/nsan_flags.cpp|   2 -
 compiler-rt/lib/nsan/nsan_stats.cpp| 107 ++---
 compiler-rt/lib/nsan/nsan_stats.h  |  43 +
 compiler-rt/lib/nsan/nsan_suppressions.cpp |  48 +
 5 files changed, 124 insertions(+), 135 deletions(-)

diff --git a/compiler-rt/lib/nsan/nsan.cpp b/compiler-rt/lib/nsan/nsan.cpp
index ece1130f73d14..fd5390e20a029 100644
--- a/compiler-rt/lib/nsan/nsan.cpp
+++ b/compiler-rt/lib/nsan/nsan.cpp
@@ -71,7 +71,6 @@ __nsan_set_value_unknown(const u8 *addr, uptr size) {
   internal_memset((void *)getShadowTypeAddrFor(addr), 0, size);
 }
 
-namespace __nsan {
 
 const char *FTInfo::kCppTypeName = "float";
 const char *FTInfo::kCppTypeName = "double";
@@ -177,8 +176,6 @@ template  T max(T a, T b) { return a < b ? b : 
a; }
 
 } // end anonymous namespace
 
-} // end namespace __nsan
-
 void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
  void *context,
  bool request_fast,
@@ -189,7 +186,7 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, 
uptr bp,
 
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_print_accumulated_stats() 
{
   if (nsan_stats)
-nsan_stats->print();
+nsan_stats->Print();
 }
 
 static void NsanAtexit() {
@@ -228,18 +225,18 @@ __nsan_get_shadow_ptr_for_longdouble_store(u8 
*store_addr, uptr n) {
   return getShadowPtrForStore(store_addr, n);
 }
 
-template  static bool isValidShadowType(const u8 *shadow_type) {
+template  static bool IsValidShadowType(const u8 *shadow_type) {
   return __builtin_memcmp(shadow_type, FTInfo::kTypePattern, sizeof(FT)) ==
  0;
 }
 
-template  static bool isZero(const T *ptr) {
+template  static bool IsZero(const T *ptr) {
   constexpr const char kZeros[kSize] = {}; // Zero initialized.
   return __builtin_memcmp(ptr, kZeros, kSize) == 0;
 }
 
-template  static bool isUnknownShadowType(const u8 *shadow_type) {
-  return isZero::kTypePattern)>(shadow_type);
+template  static bool IsUnknownShadowType(const u8 *shadow_type) {
+  return IsZero::kTypePattern)>(shadow_type);
 }
 
 // The three folowing functions check that the address stores a complete
@@ -249,21 +246,21 @@ template 
 static const u8 *getShadowPtrForLoad(const u8 *load_addr, uptr n) {
   const u8 *const shadow_type = getShadowTypeAddrFor(load_addr);
   for (uptr i = 0; i < n; ++i) {
-if (!isValidShadowType(shadow_type + i * sizeof(FT))) {
+if (!IsValidShadowType(shadow_type + i * sizeof(FT))) {
   // If loadtracking stats are enabled, log loads with invalid types
   // (tampered with through type punning).
   if (flags().enable_loadtracking_stats) {
-if (isUnknownShadowType(shadow_type + i * sizeof(FT))) {
+if (IsUnknownShadowType(shadow_type + i * sizeof(FT))) {
   // Warn only if the value is non-zero. Zero is special because
   // applications typically initialize large buffers to zero in an
   // untyped way.
-  if (!isZero(load_addr)) {
+  if (!IsZero(load_addr)) {
 GET_CALLER_PC_BP;
-nsan_stats->addUnknownLoadTrackingEvent(pc, bp);
+nsan_stats->AddUnknownLoadTrackingEvent(pc, bp);
   }
 } else {
   GET_CALLER_PC_BP;
-  nsan_stats->addInvalidLoadTrackingEvent(pc, bp);
+  nsan_stats->AddInvalidLoadTrackingEvent(pc, bp);
 }
   }
   return nullptr;
@@ -442,7 +439,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT 
CheckType,
 // want to avoid having to move the computation of `largest` before the
 // absolute value check when this branch is not taken.
 const InternalFT largest = max(ftAbs(check_value), ftAbs(check_shadow));
-nsan_stats->addCheck(CheckType, pc, bp, abs_err / largest);
+nsan_stats->AddCheck(CheckType, pc, bp, abs_err / largest);
   }
 
   // Note: writing the comparison that way ensures that when `abs_err` is Nan
@@ -534,7 +531,7 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT 
CheckType,
 
   if (flags().enable_warning_stats) {
 GET_CALLER_PC_BP;
-nsan_stats->addWarning(CheckType, pc, bp, abs_err / largest);
+nsan_stats->AddWarning(CheckType, pc, bp, abs_err / largest);
   }
 
   if (flags().halt_on_error) {
@@ -565,10 +562,10 @@ __nsan_internal_check_longdouble_q(long double value, 
__float128 shadow,
   return checkFT(value, shadow, static_cast(check_type), 
check_arg);
 }
 
-static const char *getTruthValueName(bool v) { return v ? "true" : "false"; }
+static const char *GetTruthValueName(bool v) { 

[llvm-branch-commits] [clang] [clang] Define ptrauth_sign_constant builtin. (PR #93904)

2024-06-20 Thread Daniil Kovalev via llvm-branch-commits

https://github.com/kovdan01 edited 
https://github.com/llvm/llvm-project/pull/93904
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [clang] Implement function pointer signing and authenticated function calls (PR #93906)

2024-06-20 Thread Daniil Kovalev via llvm-branch-commits


@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios -fptrauth-calls 
-fptrauth-intrinsics -emit-llvm %s  -o - | FileCheck %s

kovdan01 wrote:

Thanks. I'm OK with merging this "as is" and enhancing/re-organizing tests as a 
separate PR.

https://github.com/llvm/llvm-project/pull/93906
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [clang] Implement function pointer signing and authenticated function calls (PR #93906)

2024-06-20 Thread Daniil Kovalev via llvm-branch-commits

https://github.com/kovdan01 approved this pull request.

LGTM, but this needs a rebase - the base branch was updated, and now the PR 
contains unrelated changes.

https://github.com/llvm/llvm-project/pull/93906
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-20 Thread Christudasan Devadasan via llvm-branch-commits

cdevadas wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/96162?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#96163** https://app.graphite.dev/github/pr/llvm/llvm-project/96163?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#96162** https://app.graphite.dev/github/pr/llvm/llvm-project/96162?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#96161** https://app.graphite.dev/github/pr/llvm/llvm-project/96161?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @cdevadas and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Codegen support for constrained multi-dword sloads (PR #96163)

2024-06-20 Thread Christudasan Devadasan via llvm-branch-commits

cdevadas wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/96163?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#96163** https://app.graphite.dev/github/pr/llvm/llvm-project/96163?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#96162** https://app.graphite.dev/github/pr/llvm/llvm-project/96162?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#96161** https://app.graphite.dev/github/pr/llvm/llvm-project/96161?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @cdevadas and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/96163
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Codegen support for constrained multi-dword sloads (PR #96163)

2024-06-20 Thread Christudasan Devadasan via llvm-branch-commits

https://github.com/cdevadas ready_for_review 
https://github.com/llvm/llvm-project/pull/96163
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Codegen support for constrained multi-dword sloads (PR #96163)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Christudasan Devadasan (cdevadas)


Changes

For targets that support xnack replay feature (gfx8+), the
multi-dword scalar loads shouldn't clobber any register that
holds the src address. The constraint version of the scalar
loads have the early clobber flag attached to the dst operand
to restrict RA from re-allocating any of the src regs for its
dst operand.

---

Patch is 7.42 MiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96163.diff


265 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SMInstructions.td (+99-17) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll (+10-10) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll 
(+122-122) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll (+117-117) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir 
(+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir 
(+36-36) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll 
(+215-190) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll 
(+75-128) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll 
(+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll 
(+21-21) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll 
(+90-90) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll 
(+64-64) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll (+15-5) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll (+33-33) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll (+82-82) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll (+139-139) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll (+263-264) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll 
(+146-146) 
- (modified) llvm/test/CodeGen/AMDGPU/add.ll (+273-272) 
- (modified) llvm/test/CodeGen/AMDGPU/add.v2i16.ll (+134-134) 
- (modified) llvm/test/CodeGen/AMDGPU/amd.endpgm.ll (+8-8) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll (+614-611) 
- (modified) llvm/test/CodeGen/AMDGPU/and.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/anyext.ll (+11-11) 
- (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll (+271-253) 
- (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll 
(+1000-1005) 
- (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll 
(+1095-1060) 
- (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll 
(+236-220) 
- (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll 
(+272-254) 
- (modified) llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll (+28-28) 
- (modified) llvm/test/CodeGen/AMDGPU/bfe-combine.ll (+28-28) 
- (modified) llvm/test/CodeGen/AMDGPU/bfe-patterns.ll (+36-36) 
- (modified) llvm/test/CodeGen/AMDGPU/bfi_int.ll (+68-68) 
- (modified) llvm/test/CodeGen/AMDGPU/bfm.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/bitreverse.ll (+70-70) 
- (modified) llvm/test/CodeGen/AMDGPU/br_cc.f16.ll (+37-37) 
- (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll 
(+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/bswap.ll (+78-78) 
- (modified) llvm/test/CodeGen/AMDGPU/build_vector.ll (+22-22) 
- (modified) llvm/test/CodeGen/AMDGPU/calling-conventions.ll (+162-162) 
- (modified) llvm/test/CodeGen/AMDGPU/carryout-selection.ll (+426-424) 
- (modified) llvm/test/CodeGen/AMDGPU/clamp-modifier.ll (+209-209) 
- (modified) llvm/test/CodeGen/AMDGPU/clamp.ll (+667-667) 
- (modified) llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll (+15-15) 
- (modified) llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll (+11-11) 
- (modified) llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll (+127-130) 
- (modified) llvm/test/CodeGen/AMDGPU/copy_to_scc.ll (+11-11) 
- (modified) llvm/test/CodeGen/AMDGPU/ctlz.ll (+27-27) 
- (modified) llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll (+43-43) 
- (modified) llvm/test/CodeGen/AMDGPU/ctpop16.ll (+26-26) 
- (modified) llvm/test/CodeGen/AMDGPU/ctpop64.ll (+38-38) 
- (modified) llvm/test/CodeGen/AMDGPU/cttz.ll (+16-16) 
- (modified) llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll (+24-24) 
- (modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+14-14) 
- (modified) llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll (+154-151) 
- (modified) llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll (+41-41) 
- (modified) 

[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-20 Thread Christudasan Devadasan via llvm-branch-commits

https://github.com/cdevadas ready_for_review 
https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-globalisel

Author: Christudasan Devadasan (cdevadas)


Changes

Consider the constrained multi-dword loads while merging
individual loads to a single multi-dword load.

---

Patch is 1023.60 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96162.diff


116 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+1) 
- (modified) llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp (+63-16) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll (+84-85) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll (+6-6) 
- (modified) 
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll 
(+51-51) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll 
(+50-50) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll 
(+42-42) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll (+102-102) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll (+65-65) 
- (modified) llvm/test/CodeGen/AMDGPU/add.v2i16.ll (+21-21) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll (+204-204) 
- (modified) llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll (+24-24) 
- (modified) llvm/test/CodeGen/AMDGPU/bfe-patterns.ll (+20-20) 
- (modified) llvm/test/CodeGen/AMDGPU/bfm.ll (+5-5) 
- (modified) llvm/test/CodeGen/AMDGPU/bitreverse.ll (+157-157) 
- (modified) llvm/test/CodeGen/AMDGPU/build_vector.ll (+25-25) 
- (modified) llvm/test/CodeGen/AMDGPU/calling-conventions.ll (+25-25) 
- (modified) llvm/test/CodeGen/AMDGPU/cluster_stores.ll (+60-60) 
- (modified) llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll (+10-10) 
- (modified) llvm/test/CodeGen/AMDGPU/ctlz.ll (+227-227) 
- (modified) llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll (+223-223) 
- (modified) llvm/test/CodeGen/AMDGPU/ctpop16.ll (+52-48) 
- (modified) llvm/test/CodeGen/AMDGPU/ctpop64.ll (+24-24) 
- (modified) llvm/test/CodeGen/AMDGPU/cttz.ll (+185-185) 
- (modified) llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll (+212-212) 
- (modified) llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll (+331-334) 
- (modified) llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll (+24-24) 
- (modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+8-8) 
- (modified) llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll (+25-25) 
- (modified) llvm/test/CodeGen/AMDGPU/fabs.f16.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/fabs.ll (+17-17) 
- (modified) llvm/test/CodeGen/AMDGPU/fcanonicalize.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll (+31-32) 
- (modified) llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll (+33-33) 
- (modified) llvm/test/CodeGen/AMDGPU/fdiv.ll (+143-143) 
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics.ll (+20-20) 
- (modified) llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll (+56-56) 
- (modified) llvm/test/CodeGen/AMDGPU/fma-combine.ll (+88-88) 
- (modified) llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll (+9-9) 
- (modified) llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll (+240-240) 
- (modified) llvm/test/CodeGen/AMDGPU/fnearbyint.ll (+9-9) 
- (modified) llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll (+23-23) 
- (modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll (+6-6) 
- (modified) llvm/test/CodeGen/AMDGPU/fneg-fabs.ll (+17-17) 
- (modified) llvm/test/CodeGen/AMDGPU/fneg.ll (+13-13) 
- (modified) llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll (+26-26) 
- (modified) llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll (+22-22) 
- (modified) llvm/test/CodeGen/AMDGPU/fp-classify.ll (+13-13) 
- (modified) llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll (+33-33) 
- (modified) llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-ptr-atomics.ll (+46-46) 
- (modified) llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll (+20-20) 
- (modified) llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll (+20-20) 
- (modified) llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll (+20-20) 
- (modified) llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-atomics.ll (+24-24) 
- (modified) llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-ptr-atomics.ll 
(+38-38) 
- (modified) llvm/test/CodeGen/AMDGPU/fp_to_sint.ll (+19-19) 
- (modified) llvm/test/CodeGen/AMDGPU/fp_to_uint.ll (+19-19) 
- (modified) llvm/test/CodeGen/AMDGPU/fshl.ll (+44-44) 
- (modified) llvm/test/CodeGen/AMDGPU/fshr.ll (+20-20) 
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics.ll (+24-24) 
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll (+98-98) 
- (modified) llvm/test/CodeGen/AMDGPU/half.ll (+41-41) 
- (modified) llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll (+88-88) 
- (modified) llvm/test/CodeGen/AMDGPU/insert_waitcnt_for_precise_memory.ll 
(+124-124) 
- (modified) llvm/test/CodeGen/AMDGPU/kernel-args.ll (+53-53) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll (+15-15) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.a

[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-20 Thread Jay Foad via llvm-branch-commits

jayfoad wrote:

This looks like it is affecting codegen even when xnack is disabled? That 
should not happen.

https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] Users/hsiangkai/winograd ops transform (PR #96177)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai created 
https://github.com/llvm/llvm-project/pull/96177

None

>From 276ed8981c5243696da3bf233a777e1b84f11131 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Mon, 17 Jun 2024 11:24:07 +0100
Subject: [PATCH 1/2] [mlir][linalg] Implement Conv2D using Winograd Conv2D
 algorithm

Define high level winograd operators and convert conv_2d_nhwc_fhwc into
winograd operators. According to Winograd Conv2D algorithm, we need
three transform operators for input, filter, and output transformation.

The formula of Winograd Conv2D algorithm is

Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A

filter transform: G x g x G^T
input transform: B^T x d x B
output transform: A^T x y x A

The implementation is based on the paper, Fast Algorithm for
Convolutional Neural Networks. (https://arxiv.org/abs/1509.09308)
---
 .../mlir/Dialect/Linalg/IR/LinalgOps.td   | 114 +++
 .../Dialect/Linalg/Transforms/Transforms.h|   4 +
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp  |  78 +
 .../Dialect/Linalg/Transforms/CMakeLists.txt  |   1 +
 .../Linalg/Transforms/WinogradConv2D.cpp  | 321 ++
 mlir/test/Dialect/Linalg/winograd-conv2d.mlir | 248 ++
 .../Dialect/Linalg/TestLinalgTransforms.cpp   |  13 +
 7 files changed, 779 insertions(+)
 create mode 100644 mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp
 create mode 100644 mlir/test/Dialect/Linalg/winograd-conv2d.mlir

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 64c538367267d..de1097b6ac27b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,4 +154,118 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+  let summary = "Winograd filter transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of filter
+transformation (G x g x G^T) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$filter,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $filter `:` type($filter) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+  let summary = "Winograd input transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of input
+transformation (B^T x d x B) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$input,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $input `:` type($input) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+  let summary = "Winograd output transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r

[llvm-branch-commits] [mlir] Users/hsiangkai/winograd ops transform (PR #96177)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Hsiangkai Wang (Hsiangkai)


Changes



---

Patch is 57.69 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96177.diff


10 Files Affected:

- (modified) mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td (+114) 
- (modified) 
mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td (+51) 
- (modified) mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h (+11) 
- (modified) mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp (+78) 
- (modified) mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp (+25) 
- (modified) mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt (+1) 
- (added) mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp (+327) 
- (added) mlir/test/Dialect/Linalg/transform-winograd-conv2d.mlir (+88) 
- (added) mlir/test/Dialect/Linalg/winograd-conv2d.mlir (+248) 
- (modified) mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp (+13) 


``diff
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 64c538367267d..de1097b6ac27b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,4 +154,118 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+  let summary = "Winograd filter transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of filter
+transformation (G x g x G^T) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$filter,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $filter `:` type($filter) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+  let summary = "Winograd input transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of input
+transformation (B^T x d x B) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$input,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $input `:` type($input) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+  let summary = "Winograd output transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of output
+transformation (A^T x y x A) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$value,
+

[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96178)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Hsiangkai Wang (Hsiangkai)


Changes

Convert Linalg winograd_filter_transform, winograd_input_transform, and
winograd_output_transform into nested loops with matrix multiplication
with constant transform matrices.

Support several configurations of Winograd Conv2D, including F(2, 3),
F(4, 3) and F(2, 5). These configurations show that the implementation
can support different kernel size (3 and 5) and different output size
(2 and 4). Besides symetric kernel size 3x3 and 5x5, this patch also
supports 1x3, 3x1, 1x5, and 5x1 kernels.

The implementation is based on the paper, Fast Algorithm for
Convolutional Neural Networks. (https://arxiv.org/abs/1509.09308)


---

Patch is 99.59 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96178.diff


11 Files Affected:

- (modified) mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td (+114) 
- (modified) 
mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td (+51) 
- (modified) mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h (+14) 
- (modified) mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp (+78) 
- (modified) mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp (+25) 
- (modified) mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt (+1) 
- (added) mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp (+1100) 
- (added) mlir/test/Dialect/Linalg/transform-winograd-conv2d.mlir (+88) 
- (added) mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir (+105) 
- (added) mlir/test/Dialect/Linalg/winograd-conv2d.mlir (+248) 
- (modified) mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp (+24) 


``diff
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 64c538367267d..de1097b6ac27b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,4 +154,118 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+  let summary = "Winograd filter transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of filter
+transformation (G x g x G^T) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$filter,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $filter `:` type($filter) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+  let summary = "Winograd input transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of input
+transformation (B^T x d x B) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$input,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $input `:` type($input) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+  let summary = "Winograd output transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matr

[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96178)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir-linalg

Author: Hsiangkai Wang (Hsiangkai)


Changes

Convert Linalg winograd_filter_transform, winograd_input_transform, and
winograd_output_transform into nested loops with matrix multiplication
with constant transform matrices.

Support several configurations of Winograd Conv2D, including F(2, 3),
F(4, 3) and F(2, 5). These configurations show that the implementation
can support different kernel size (3 and 5) and different output size
(2 and 4). Besides symetric kernel size 3x3 and 5x5, this patch also
supports 1x3, 3x1, 1x5, and 5x1 kernels.

The implementation is based on the paper, Fast Algorithm for
Convolutional Neural Networks. (https://arxiv.org/abs/1509.09308)


---

Patch is 99.59 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96178.diff


11 Files Affected:

- (modified) mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td (+114) 
- (modified) 
mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td (+51) 
- (modified) mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h (+14) 
- (modified) mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp (+78) 
- (modified) mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp (+25) 
- (modified) mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt (+1) 
- (added) mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp (+1100) 
- (added) mlir/test/Dialect/Linalg/transform-winograd-conv2d.mlir (+88) 
- (added) mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir (+105) 
- (added) mlir/test/Dialect/Linalg/winograd-conv2d.mlir (+248) 
- (modified) mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp (+24) 


``diff
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 64c538367267d..de1097b6ac27b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,4 +154,118 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+  let summary = "Winograd filter transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of filter
+transformation (G x g x G^T) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$filter,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $filter `:` type($filter) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+  let summary = "Winograd input transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of input
+transformation (B^T x d x B) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$input,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $input `:` type($input) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+  let summary = "Winograd output transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+ 

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96177)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai edited 
https://github.com/llvm/llvm-project/pull/96177
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96179)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Hsiangkai Wang (Hsiangkai)


Changes

In order to support arbitrary size input data of conv2d, implement
TilingInterface for winograd operators. Before converting winograd
operators into nested loops with matrix multiply, tile the input of
conv2d into the supported size first.

Add a transform operator structured.decompose_winograd_op to decompose
winograd operators. Before applying the transform op, use tile_using_for
to tile the input data into supported size. The test case shows how to
tile and decompose winograd operators.

---

Patch is 153.54 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96179.diff


12 Files Affected:

- (modified) mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td (+129) 
- (modified) 
mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td (+88) 
- (modified) mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h (+59) 
- (modified) mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp (+359) 
- (modified) mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp (+52) 
- (modified) mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt (+1) 
- (added) mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp (+1118) 
- (added) mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir 
(+332) 
- (added) mlir/test/Dialect/Linalg/transform-winograd-conv2d.mlir (+88) 
- (added) mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir (+105) 
- (added) mlir/test/Dialect/Linalg/winograd-conv2d.mlir (+248) 
- (modified) mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp (+24) 


``diff
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 64c538367267d..45726d6ee2224 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,4 +154,133 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform",
+[DeclareOpInterfaceMethods]> {
+  let summary = "Winograd filter transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of filter
+transformation (G x g x G^T) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$filter,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $filter `:` type($filter) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform",
+[DeclareOpInterfaceMethods]> {
+  let summary = "Winograd input transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of input
+transformation (B^T x d x B) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$input,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $input `:` type($input) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform",
+[DeclareOpInterfaceMethods]> {
+  let summary = "Winograd output transform operator";
+  let description = [{
+Winograd Conv2D algorithm wi

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96177)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai edited 
https://github.com/llvm/llvm-project/pull/96177
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96177)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai updated 
https://github.com/llvm/llvm-project/pull/96177

>From 276ed8981c5243696da3bf233a777e1b84f11131 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Mon, 17 Jun 2024 11:24:07 +0100
Subject: [PATCH 1/2] [mlir][linalg] Implement Conv2D using Winograd Conv2D
 algorithm

Define high level winograd operators and convert conv_2d_nhwc_fhwc into
winograd operators. According to Winograd Conv2D algorithm, we need
three transform operators for input, filter, and output transformation.

The formula of Winograd Conv2D algorithm is

Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A

filter transform: G x g x G^T
input transform: B^T x d x B
output transform: A^T x y x A

The implementation is based on the paper, Fast Algorithm for
Convolutional Neural Networks. (https://arxiv.org/abs/1509.09308)
---
 .../mlir/Dialect/Linalg/IR/LinalgOps.td   | 114 +++
 .../Dialect/Linalg/Transforms/Transforms.h|   4 +
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp  |  78 +
 .../Dialect/Linalg/Transforms/CMakeLists.txt  |   1 +
 .../Linalg/Transforms/WinogradConv2D.cpp  | 321 ++
 mlir/test/Dialect/Linalg/winograd-conv2d.mlir | 248 ++
 .../Dialect/Linalg/TestLinalgTransforms.cpp   |  13 +
 7 files changed, 779 insertions(+)
 create mode 100644 mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp
 create mode 100644 mlir/test/Dialect/Linalg/winograd-conv2d.mlir

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 64c538367267d..de1097b6ac27b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,4 +154,118 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+  let summary = "Winograd filter transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of filter
+transformation (G x g x G^T) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$filter,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $filter `:` type($filter) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+  let summary = "Winograd input transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of input
+transformation (B^T x d x B) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$input,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $input `:` type($input) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+  let summary = "Winograd output transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) 

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96177)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai updated 
https://github.com/llvm/llvm-project/pull/96177

>From 276ed8981c5243696da3bf233a777e1b84f11131 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Mon, 17 Jun 2024 11:24:07 +0100
Subject: [PATCH 1/2] [mlir][linalg] Implement Conv2D using Winograd Conv2D
 algorithm

Define high level winograd operators and convert conv_2d_nhwc_fhwc into
winograd operators. According to Winograd Conv2D algorithm, we need
three transform operators for input, filter, and output transformation.

The formula of Winograd Conv2D algorithm is

Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A

filter transform: G x g x G^T
input transform: B^T x d x B
output transform: A^T x y x A

The implementation is based on the paper, Fast Algorithm for
Convolutional Neural Networks. (https://arxiv.org/abs/1509.09308)
---
 .../mlir/Dialect/Linalg/IR/LinalgOps.td   | 114 +++
 .../Dialect/Linalg/Transforms/Transforms.h|   4 +
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp  |  78 +
 .../Dialect/Linalg/Transforms/CMakeLists.txt  |   1 +
 .../Linalg/Transforms/WinogradConv2D.cpp  | 321 ++
 mlir/test/Dialect/Linalg/winograd-conv2d.mlir | 248 ++
 .../Dialect/Linalg/TestLinalgTransforms.cpp   |  13 +
 7 files changed, 779 insertions(+)
 create mode 100644 mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp
 create mode 100644 mlir/test/Dialect/Linalg/winograd-conv2d.mlir

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 64c538367267d..de1097b6ac27b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,4 +154,118 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+  let summary = "Winograd filter transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of filter
+transformation (G x g x G^T) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$filter,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $filter `:` type($filter) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+  let summary = "Winograd input transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of input
+transformation (B^T x d x B) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$input,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $input `:` type($input) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+  let summary = "Winograd output transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) 

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96177)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai updated 
https://github.com/llvm/llvm-project/pull/96177

>From 0c542404842679a5b9653a9a1049fb765245692e Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Mon, 17 Jun 2024 11:24:07 +0100
Subject: [PATCH 1/2] [mlir][linalg] Implement Conv2D using Winograd Conv2D
 algorithm

Define high level winograd operators and convert conv_2d_nhwc_fhwc into
winograd operators. According to Winograd Conv2D algorithm, we need
three transform operators for input, filter, and output transformation.

The formula of Winograd Conv2D algorithm is

Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A

filter transform: G x g x G^T
input transform: B^T x d x B
output transform: A^T x y x A

The implementation is based on the paper, Fast Algorithm for
Convolutional Neural Networks. (https://arxiv.org/abs/1509.09308)
---
 .../mlir/Dialect/Linalg/IR/LinalgOps.td   | 114 +++
 .../Dialect/Linalg/Transforms/Transforms.h|   4 +
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp  |  78 +
 .../Dialect/Linalg/Transforms/CMakeLists.txt  |   1 +
 .../Linalg/Transforms/WinogradConv2D.cpp  | 321 ++
 mlir/test/Dialect/Linalg/winograd-conv2d.mlir | 248 ++
 .../Dialect/Linalg/TestLinalgTransforms.cpp   |  13 +
 7 files changed, 779 insertions(+)
 create mode 100644 mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp
 create mode 100644 mlir/test/Dialect/Linalg/winograd-conv2d.mlir

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 64c538367267d..de1097b6ac27b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,4 +154,118 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+  let summary = "Winograd filter transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of filter
+transformation (G x g x G^T) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$filter,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $filter `:` type($filter) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+  let summary = "Winograd input transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of input
+transformation (B^T x d x B) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$input,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $input `:` type($input) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+  let summary = "Winograd output transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) 

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96177)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai updated 
https://github.com/llvm/llvm-project/pull/96177

>From 0c542404842679a5b9653a9a1049fb765245692e Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Mon, 17 Jun 2024 11:24:07 +0100
Subject: [PATCH 1/2] [mlir][linalg] Implement Conv2D using Winograd Conv2D
 algorithm

Define high level winograd operators and convert conv_2d_nhwc_fhwc into
winograd operators. According to Winograd Conv2D algorithm, we need
three transform operators for input, filter, and output transformation.

The formula of Winograd Conv2D algorithm is

Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A

filter transform: G x g x G^T
input transform: B^T x d x B
output transform: A^T x y x A

The implementation is based on the paper, Fast Algorithm for
Convolutional Neural Networks. (https://arxiv.org/abs/1509.09308)
---
 .../mlir/Dialect/Linalg/IR/LinalgOps.td   | 114 +++
 .../Dialect/Linalg/Transforms/Transforms.h|   4 +
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp  |  78 +
 .../Dialect/Linalg/Transforms/CMakeLists.txt  |   1 +
 .../Linalg/Transforms/WinogradConv2D.cpp  | 321 ++
 mlir/test/Dialect/Linalg/winograd-conv2d.mlir | 248 ++
 .../Dialect/Linalg/TestLinalgTransforms.cpp   |  13 +
 7 files changed, 779 insertions(+)
 create mode 100644 mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp
 create mode 100644 mlir/test/Dialect/Linalg/winograd-conv2d.mlir

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 64c538367267d..de1097b6ac27b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,4 +154,118 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+  let summary = "Winograd filter transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of filter
+transformation (G x g x G^T) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$filter,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $filter `:` type($filter) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+  let summary = "Winograd input transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of input
+transformation (B^T x d x B) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$input,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $input `:` type($input) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+  let summary = "Winograd output transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) 

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai created 
https://github.com/llvm/llvm-project/pull/96182

Add a transform operator structured.winograd_conv2d to convert
linalg.conv_2d_nhwc_fhwc to Linalg winograd operators.


>From a3d188ed7d25df05ccd6bc227ddc361b0c66a2f4 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Mon, 17 Jun 2024 11:24:07 +0100
Subject: [PATCH 1/2] [mlir][linalg] Implement Conv2D using Winograd Conv2D
 algorithm

Define high level winograd operators and convert conv_2d_nhwc_fhwc into
winograd operators. According to Winograd Conv2D algorithm, we need
three transform operators for input, filter, and output transformation.

The formula of Winograd Conv2D algorithm is

Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A

filter transform: G x g x G^T
input transform: B^T x d x B
output transform: A^T x y x A

The implementation is based on the paper, Fast Algorithm for
Convolutional Neural Networks. (https://arxiv.org/abs/1509.09308)
---
 .../mlir/Dialect/Linalg/IR/LinalgOps.td   | 114 +++
 .../Dialect/Linalg/Transforms/Transforms.h|   4 +
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp  |  78 +
 .../Dialect/Linalg/Transforms/CMakeLists.txt  |   1 +
 .../Linalg/Transforms/WinogradConv2D.cpp  | 321 ++
 mlir/test/Dialect/Linalg/winograd-conv2d.mlir | 248 ++
 .../Dialect/Linalg/TestLinalgTransforms.cpp   |  13 +
 7 files changed, 779 insertions(+)
 create mode 100644 mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp
 create mode 100644 mlir/test/Dialect/Linalg/winograd-conv2d.mlir

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 64c538367267d..de1097b6ac27b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,4 +154,118 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+  let summary = "Winograd filter transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of filter
+transformation (G x g x G^T) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$filter,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $filter `:` type($filter) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+  let summary = "Winograd input transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of input
+transformation (B^T x d x B) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$input,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $input `:` type($input) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+  let summary = "Winograd output transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. 

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-mlir-linalg

@llvm/pr-subscribers-mlir

Author: Hsiangkai Wang (Hsiangkai)


Changes

Add a transform operator structured.winograd_conv2d to convert
linalg.conv_2d_nhwc_fhwc to Linalg winograd operators.


---

Patch is 57.69 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96182.diff


10 Files Affected:

- (modified) mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td (+114) 
- (modified) 
mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td (+51) 
- (modified) mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h (+11) 
- (modified) mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp (+78) 
- (modified) mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp (+25) 
- (modified) mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt (+1) 
- (added) mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp (+327) 
- (added) mlir/test/Dialect/Linalg/transform-winograd-conv2d.mlir (+88) 
- (added) mlir/test/Dialect/Linalg/winograd-conv2d.mlir (+248) 
- (modified) mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp (+13) 


``diff
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 64c538367267d..de1097b6ac27b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,4 +154,118 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+  let summary = "Winograd filter transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of filter
+transformation (G x g x G^T) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$filter,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $filter `:` type($filter) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+  let summary = "Winograd input transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of input
+transformation (B^T x d x B) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$input,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $input `:` type($input) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+  let summary = "Winograd output transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high lev

[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-linalg

Author: Hsiangkai Wang (Hsiangkai)


Changes

Convert Linalg winograd_filter_transform, winograd_input_transform, and
winograd_output_transform into nested loops with matrix multiplication
with constant transform matrices.

Support several configurations of Winograd Conv2D, including F(2, 3),
F(4, 3) and F(2, 5). These configurations show that the implementation
can support different kernel size (3 and 5) and different output size
(2 and 4). Besides symetric kernel size 3x3 and 5x5, this patch also
supports 1x3, 3x1, 1x5, and 5x1 kernels.

The implementation is based on the paper, Fast Algorithm for
Convolutional Neural Networks. (https://arxiv.org/abs/1509.09308)


---

Patch is 99.59 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96183.diff


11 Files Affected:

- (modified) mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td (+114) 
- (modified) 
mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td (+51) 
- (modified) mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h (+14) 
- (modified) mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp (+78) 
- (modified) mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp (+25) 
- (modified) mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt (+1) 
- (added) mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp (+1100) 
- (added) mlir/test/Dialect/Linalg/transform-winograd-conv2d.mlir (+88) 
- (added) mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir (+105) 
- (added) mlir/test/Dialect/Linalg/winograd-conv2d.mlir (+248) 
- (modified) mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp (+24) 


``diff
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 64c538367267d..de1097b6ac27b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,4 +154,118 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+  let summary = "Winograd filter transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of filter
+transformation (G x g x G^T) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$filter,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $filter `:` type($filter) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+  let summary = "Winograd input transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+This operator is defined to represent the high level concept of input
+transformation (B^T x d x B) in the Winograd Conv2D algorithm.
+  }];
+
+  let arguments = (ins AnyRankedTensor:$input,
+   AnyRankedTensor:$output,
+   I64Attr:$m,
+   I64Attr:$r
+  );
+
+  let results = (outs AnyRankedTensor:$result);
+  let assemblyFormat = [{
+attr-dict
+`m` `(` $m `)`
+`r` `(` $r `)`
+`ins` `(` $input `:` type($input) `)`
+`outs` `(` $output `:` type($output) `)`
+`->` type($result)
+  }];
+  let hasVerifier = 1;
+}
+
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+  let summary = "Winograd output transform operator";
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96177)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

Hsiangkai wrote:

Sorry, I am still figuring out how to create stack PRs.

https://github.com/llvm/llvm-project/pull/96177
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96178)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

Hsiangkai wrote:

Sorry, I am still figuring out how to create stack PRs.

https://github.com/llvm/llvm-project/pull/96178
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96178)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai closed 
https://github.com/llvm/llvm-project/pull/96178
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96177)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai closed 
https://github.com/llvm/llvm-project/pull/96177
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96179)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai closed 
https://github.com/llvm/llvm-project/pull/96179
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96179)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

Hsiangkai wrote:

Sorry, I am still figuring out how to create stack PRs.

https://github.com/llvm/llvm-project/pull/96179
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai created 
https://github.com/llvm/llvm-project/pull/96184

In order to support arbitrary size input data of conv2d, implement 
TilingInterface for winograd operators. Before converting winograd operators 
into nested loops with matrix multiply, tile the input of conv2d into the 
supported size first.

Add a transform operator structured.decompose_winograd_op to decompose winograd 
operators. Before applying the transform op, use tile_using_for to tile the 
input data into supported size. The test case shows how to tile and decompose 
winograd operators.

>From 7300578082fb321a0617ed2b61202eca39989e59 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Mon, 17 Jun 2024 11:44:27 +0100
Subject: [PATCH] [mlir][linalg] Implement TilingInterface for winograd
 operators

In order to support arbitrary size input data of conv2d, implement
TilingInterface for winograd operators. Before converting winograd
operators into nested loops with matrix multiply, tile the input of
conv2d into the supported size first.

Add a transform operator structured.decompose_winograd_op to decompose
winograd operators. Before applying the transform op, use tile_using_for
to tile the input data into supported size. The test case shows how to
tile and decompose winograd operators.
---
 .../mlir/Dialect/Linalg/IR/LinalgOps.td   |  21 +-
 .../Linalg/TransformOps/LinalgTransformOps.td |  37 ++
 .../Dialect/Linalg/Transforms/Transforms.h|  45 +++
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp  | 281 +++
 .../TransformOps/LinalgTransformOps.cpp   |  27 ++
 .../Linalg/Transforms/WinogradConv2D.cpp  |  18 +
 .../transform-tile-and-winograd-rewrite.mlir  | 332 ++
 7 files changed, 758 insertions(+), 3 deletions(-)
 create mode 100644 
mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index de1097b6ac27b..45726d6ee2224 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,7 +154,12 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd filter transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
@@ -192,7 +197,12 @@ def Linalg_WinogradFilterTransformOp : 
Linalg_Op<"winograd_filter_transform"> {
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd input transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
@@ -230,7 +240,12 @@ def Linalg_WinogradInputTransformOp : 
Linalg_Op<"winograd_input_transform"> {
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd output transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
diff --git 
a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td 
b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 68d0f713caad4..71736eae38b4f 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -2638,4 +2638,41 @@ def WinogradConv2DOp : Op {
+  let description = [{
+Decompose winograd operators. It will convert filter, input and output
+transform operators into a combination of scf, tensor, and linalg
+equivalent operators. Before applying this transform operator, users
+need to tile winograd transform operators into supported sizes.
+
+ Return modes:
+
+This operation fails if `target` is unsupported. Otherwise, the operation
+succeeds and returns a handle of the sequence that replaces the original
+operator.
+  }];
+
+  let arguments = (ins TransformHandleTypeInterface:$target);
+  let results = (outs TransformHandleTypeInterface:$transformed);
+
+  let assemblyFormat =
+"$target attr-dict `:` functional-type($target, results)";
+
+  let builders = [
+OpBuilder<(ins "Value":$target)>
+  ];
+
+  let extraClassDeclaration = [{
+::mlir::DiagnosedSilenceableFailure applyToOne(
+::mlir::transform::TransformRewriter &rewriter,
+::mlir::Operation *target,
+::mlir::transform::ApplyToEachResultList &results,
+

[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir-linalg

Author: Hsiangkai Wang (Hsiangkai)


Changes

In order to support arbitrary size input data of conv2d, implement 
TilingInterface for winograd operators. Before converting winograd operators 
into nested loops with matrix multiply, tile the input of conv2d into the 
supported size first.

Add a transform operator structured.decompose_winograd_op to decompose winograd 
operators. Before applying the transform op, use tile_using_for to tile the 
input data into supported size. The test case shows how to tile and decompose 
winograd operators.

---

Patch is 58.85 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96184.diff


7 Files Affected:

- (modified) mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td (+18-3) 
- (modified) 
mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td (+37) 
- (modified) mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h (+45) 
- (modified) mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp (+281) 
- (modified) mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp (+27) 
- (modified) mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp (+18) 
- (added) mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir 
(+332) 


``diff
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index de1097b6ac27b..45726d6ee2224 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,7 +154,12 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd filter transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
@@ -192,7 +197,12 @@ def Linalg_WinogradFilterTransformOp : 
Linalg_Op<"winograd_filter_transform"> {
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd input transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
@@ -230,7 +240,12 @@ def Linalg_WinogradInputTransformOp : 
Linalg_Op<"winograd_input_transform"> {
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd output transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
diff --git 
a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td 
b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 68d0f713caad4..71736eae38b4f 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -2638,4 +2638,41 @@ def WinogradConv2DOp : Op {
+  let description = [{
+Decompose winograd operators. It will convert filter, input and output
+transform operators into a combination of scf, tensor, and linalg
+equivalent operators. Before applying this transform operator, users
+need to tile winograd transform operators into supported sizes.
+
+ Return modes:
+
+This operation fails if `target` is unsupported. Otherwise, the operation
+succeeds and returns a handle of the sequence that replaces the original
+operator.
+  }];
+
+  let arguments = (ins TransformHandleTypeInterface:$target);
+  let results = (outs TransformHandleTypeInterface:$transformed);
+
+  let assemblyFormat =
+"$target attr-dict `:` functional-type($target, results)";
+
+  let builders = [
+OpBuilder<(ins "Value":$target)>
+  ];
+
+  let extraClassDeclaration = [{
+::mlir::DiagnosedSilenceableFailure applyToOne(
+::mlir::transform::TransformRewriter &rewriter,
+::mlir::Operation *target,
+::mlir::transform::ApplyToEachResultList &results,
+::mlir::transform::TransformState &state);
+  }];
+}
+
 #endif // LINALG_TRANSFORM_OPS
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h 
b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index bb7ec590faad0..d0eec2be1f8fb 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1319,6 +1319,51 @@ FailureOr winogradConv2D(RewriterBase 
&rewriter,
   linalg::Conv2DNhwcFhwcOp op, int64_t m,
   

[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Hsiangkai Wang (Hsiangkai)


Changes

In order to support arbitrary size input data of conv2d, implement 
TilingInterface for winograd operators. Before converting winograd operators 
into nested loops with matrix multiply, tile the input of conv2d into the 
supported size first.

Add a transform operator structured.decompose_winograd_op to decompose winograd 
operators. Before applying the transform op, use tile_using_for to tile the 
input data into supported size. The test case shows how to tile and decompose 
winograd operators.

---

Patch is 58.85 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96184.diff


7 Files Affected:

- (modified) mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td (+18-3) 
- (modified) 
mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td (+37) 
- (modified) mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h (+45) 
- (modified) mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp (+281) 
- (modified) mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp (+27) 
- (modified) mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp (+18) 
- (added) mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir 
(+332) 


``diff
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index de1097b6ac27b..45726d6ee2224 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,7 +154,12 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd filter transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
@@ -192,7 +197,12 @@ def Linalg_WinogradFilterTransformOp : 
Linalg_Op<"winograd_filter_transform"> {
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd input transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
@@ -230,7 +240,12 @@ def Linalg_WinogradInputTransformOp : 
Linalg_Op<"winograd_input_transform"> {
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd output transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
diff --git 
a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td 
b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 68d0f713caad4..71736eae38b4f 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -2638,4 +2638,41 @@ def WinogradConv2DOp : Op {
+  let description = [{
+Decompose winograd operators. It will convert filter, input and output
+transform operators into a combination of scf, tensor, and linalg
+equivalent operators. Before applying this transform operator, users
+need to tile winograd transform operators into supported sizes.
+
+ Return modes:
+
+This operation fails if `target` is unsupported. Otherwise, the operation
+succeeds and returns a handle of the sequence that replaces the original
+operator.
+  }];
+
+  let arguments = (ins TransformHandleTypeInterface:$target);
+  let results = (outs TransformHandleTypeInterface:$transformed);
+
+  let assemblyFormat =
+"$target attr-dict `:` functional-type($target, results)";
+
+  let builders = [
+OpBuilder<(ins "Value":$target)>
+  ];
+
+  let extraClassDeclaration = [{
+::mlir::DiagnosedSilenceableFailure applyToOne(
+::mlir::transform::TransformRewriter &rewriter,
+::mlir::Operation *target,
+::mlir::transform::ApplyToEachResultList &results,
+::mlir::transform::TransformState &state);
+  }];
+}
+
 #endif // LINALG_TRANSFORM_OPS
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h 
b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index bb7ec590faad0..d0eec2be1f8fb 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1319,6 +1319,51 @@ FailureOr winogradConv2D(RewriterBase 
&rewriter,
   linalg::Conv2DNhwcFhwcOp op, int64_t m,
  

[llvm-branch-commits] [mlir] [mlir][linalg] Add transform operator for Winograd Conv2D algorithm (PR #96182)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai updated 
https://github.com/llvm/llvm-project/pull/96182

>From 374b0d5b83ce080bea690199380e270a36ad1c52 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Mon, 17 Jun 2024 11:49:08 +0100
Subject: [PATCH] [mlir][linalg] Add transform operator for Winograd Conv2D
 algorithm

Add a transform operator structured.winograd_conv2d to convert
linalg.conv_2d_nhwc_fhwc to Linalg winograd operators.
---
 .../Linalg/TransformOps/LinalgTransformOps.td | 51 +++
 .../Dialect/Linalg/Transforms/Transforms.h|  7 ++
 .../TransformOps/LinalgTransformOps.cpp   | 25 ++
 .../Linalg/Transforms/WinogradConv2D.cpp  |  6 ++
 .../Linalg/transform-winograd-conv2d.mlir | 88 +++
 5 files changed, 177 insertions(+)
 create mode 100644 mlir/test/Dialect/Linalg/transform-winograd-conv2d.mlir

diff --git 
a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td 
b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 93e2c2db729da..68d0f713caad4 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -2587,4 +2587,55 @@ def MapCopyToThreadsOp :
   }];
 }
 
+//===--===//
+// Winograd Conv2D
+//===--===//
+
+def WinogradConv2DOp : Op {
+  let description = [{
+Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
+matrix multiply. Before the matrix multiply, it will convert filter and
+input into a format suitable for batched matrix multiply. After the matrix
+multiply, it will convert output to the final result tensor.
+
+The algorithm F(m x m, r x r) is
+
+Y = A^T x [(G x g x G^T) @ (B^T x d x B)] x A
+
+The size of output Y is m x m. The size of filter g is r x r. The size of
+input d is (m + r - 1) x (m + r - 1). A^T, A, G^T, G, B^T, and B are
+transformation matrices.
+
+ Return modes:
+
+This operation fails if `target` is unsupported. Otherwise, the operation
+succeeds and returns a handle of the sequence that replaces the original
+convolution.
+  }];
+
+  let arguments = (ins TransformHandleTypeInterface:$target,
+   I64Attr:$m,
+   I64Attr:$r);
+  let results = (outs TransformHandleTypeInterface:$transformed);
+
+  let assemblyFormat =
+"$target attr-dict `:` functional-type($target, results)";
+
+  let builders = [
+OpBuilder<(ins "Value":$target)>
+  ];
+
+  let extraClassDeclaration = [{
+::mlir::DiagnosedSilenceableFailure applyToOne(
+::mlir::transform::TransformRewriter &rewriter,
+::mlir::linalg::LinalgOp target,
+::mlir::transform::ApplyToEachResultList &results,
+::mlir::transform::TransformState &state);
+  }];
+}
+
 #endif // LINALG_TRANSFORM_OPS
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h 
b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 835aeaf2ffed3..da107b66257a5 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1312,6 +1312,13 @@ FailureOr transposeBatchMatmul(RewriterBase 
&rewriter,
 linalg::BatchMatmulOp op,
 bool transposeLHS = true);
 
+/// Convert linalg.conv_2d_nhwc_fhwc to Winograd Conv2D algorithm
+/// F(m x m, r x r). m is the dimension size of output and r is the dimension
+/// size of filter.
+FailureOr winogradConv2D(RewriterBase &rewriter,
+  linalg::Conv2DNhwcFhwcOp op, int64_t m,
+  int64_t r);
+
 
//===--===//
 // Rewrite patterns wrapping transformations.
 // TODO: every single such pattern should be a close to noop wrapper around a
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp 
b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index bc02788f9c441..d051b29e1f06f 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -3480,6 +3480,31 @@ DiagnosedSilenceableFailure 
transform::MapCopyToThreadsOp::applyToOne(
   return DiagnosedSilenceableFailure::success();
 }
 
+//===--===//
+// WinogradConv2DOp
+//===--===//
+
+DiagnosedSilenceableFailure transform::WinogradConv2DOp::applyToOne(
+transform::TransformRewriter &rewriter, linalg::LinalgOp target,
+transform::ApplyToEachResultList &results,
+transform::TransformState &state) {
+  rewriter.setInsertionPoint(target);
+  auto maybeTransfo

[llvm-branch-commits] [mlir] [mlir][linalg] Decompose winograd operators (PR #96183)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai updated 
https://github.com/llvm/llvm-project/pull/96183

>From 24c4f957ae673c2955fc0674f91e488813d59350 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Mon, 17 Jun 2024 17:39:49 +0100
Subject: [PATCH] [mlir][linalg] Decompose winograd operators

Convert Linalg winograd_filter_transform, winograd_input_transform, and
winograd_output_transform into nested loops with matrix multiplication
with constant transform matrices.

Support several configurations of Winograd Conv2D, including F(2, 3),
F(4, 3) and F(2, 5). These configurations show that the implementation
can support different kernel size (3 and 5) and different output size
(2 and 4). Besides symetric kernel size 3x3 and 5x5, this patch also
supports 1x3, 3x1, 1x5, and 5x1 kernels.

The implementation is based on the paper, Fast Algorithm for
Convolutional Neural Networks. (https://arxiv.org/abs/1509.09308)
---
 .../Dialect/Linalg/Transforms/Transforms.h|   3 +
 .../Linalg/Transforms/WinogradConv2D.cpp  | 773 ++
 .../Linalg/winograd-conv2d-rewrite.mlir   | 105 +++
 .../Dialect/Linalg/TestLinalgTransforms.cpp   |  11 +
 4 files changed, 892 insertions(+)
 create mode 100644 mlir/test/Dialect/Linalg/winograd-conv2d-rewrite.mlir

diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h 
b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index da107b66257a5..bb7ec590faad0 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1703,6 +1703,9 @@ void populateBlockPackMatmulPatterns(RewritePatternSet 
&patterns,
 void populateWinogradConv2DPatterns(RewritePatternSet &patterns, int64_t m,
 int64_t r);
 
+/// Patterns to decompose Winograd operators.
+void populateDecomposeWinogradOpsPatterns(RewritePatternSet &patterns);
+
 } // namespace linalg
 } // namespace mlir
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp 
b/mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp
index d1f4be8bbf29a..d245723c85646 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/WinogradConv2D.cpp
@@ -12,7 +12,10 @@
 //
 
//===--===//
 
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tosa/Utils/ConversionUtils.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -23,6 +26,156 @@ namespace linalg {
 
 namespace {
 
+// clang-format off
+// Winograd Conv2D uses a minimal 2D filtering algorithm to calculate its
+// result. The formula of minimal 2D filtering algorithm F(m x m, r x r),
+// m is the output dimension and r is the filter dimension, is
+//
+// Y = A^T x [ (G x g x G^T) x (B^T x d x B) ] x A
+//
+// g is filter and d is input data. We need to prepare 6 constant
+// transformation matrices, G, G^T, B^T, B, A^T, and A for this formula.
+//
+// The following tables define these constant transformation matrices for
+// F(2 x 2, 3 x 3), F(4 x 4, 3 x 3), and F(2 x 2, 5 x 5)
+constexpr float G_2x2_3x3[] = {
+   -1, 0,   0,
+ 1./2, -1./2, 1./2,
+ 1./2,  1./2, 1./2,
+0, 0,1
+};
+
+constexpr float GT_2x2_3x3[] = {
+   -1,  1./2, 1./2, 0,
+0, -1./2, 1./2, 0,
+0,  1./2, 1./2, 1
+};
+
+constexpr float BT_2x2_3x3[] = {
+   -1,0,   1,   0,
+0,   -1,   1,   0,
+0,1,   1,   0,
+0,   -1,   0,   1
+};
+
+constexpr float B_2x2_3x3[] = {
+   -1,0,   0,   0,
+0,   -1,   1,  -1,
+1,1,   1,   0,
+0,0,   0,   1
+};
+
+constexpr float AT_2x2_3x3[] = {
+1,1,   1,   0,
+0,   -1,   1,   1
+};
+
+constexpr float A_2x2_3x3[] = {
+1,0,
+1,   -1,
+1,1,
+0,1
+};
+
+constexpr float G_4x4_3x3[] = {
+ 1, 0, 0,
+ -1./3,  1./3, -1./3,
+ -1./3, -1./3, -1./3,
+ 1./12, -1./6,  1./3,
+ 1./12,  1./6,  1./3,
+ 0, 0, 1
+};
+
+constexpr float GT_4x4_3x3[] = {
+ 1,  -1./3, -1./3, 1./12, 1./12, 0,
+ 0,   1./3, -1./3, -1./6,  1./6, 0,
+ 0,  -1./3, -1./3,  1./3,  1./3, 1
+};
+
+constexpr float BT_4x4_3x3[] = {
+ 1./4, 0, -5./16,  0, 1./16, 0,
+0,  1./4,  -1./4, -1./16, 1./16, 0,
+0, -1./4,  -1./4,  1./16, 1./16, 0,
+0,  1./4,  -1./8,  -1./4,  1./8, 0,
+0, -1./4,  -1./8,   1./4,  1./8, 0,
+0,  1./4,  0, -5./16, 0, 1./16
+};
+
+constexpr float B_4x4_3x3[] = {
+   1./4,  0, 0, 0, 0,  0,
+  0,   1./4, -1./4,  1./4, -1./4,   1./4,
+ -5./16,  -1./4, -1./4, -1./8, -1./8,  0,
+  0, -1./16, 1./16, -1./4,  1./4, -5./16,
+  1./16,  1./16, 1./16,  1./8,  1./8,  0,
+  0,  0, 0, 0, 0,  1./16
+};
+
+constexpr float AT_4x4_3x3[] = {
+ 1./8,  1./4, 

[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai updated 
https://github.com/llvm/llvm-project/pull/96184

>From 73b524b7746839614655fd8082dbda297e93ba72 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Mon, 17 Jun 2024 11:44:27 +0100
Subject: [PATCH] [mlir][linalg] Implement TilingInterface for winograd
 operators

In order to support arbitrary size input data of conv2d, implement
TilingInterface for winograd operators. Before converting winograd
operators into nested loops with matrix multiply, tile the input of
conv2d into the supported size first.

Add a transform operator structured.decompose_winograd_op to decompose
winograd operators. Before applying the transform op, use tile_using_for
to tile the input data into supported size. The test case shows how to
tile and decompose winograd operators.
---
 .../mlir/Dialect/Linalg/IR/LinalgOps.td   |  21 +-
 .../Linalg/TransformOps/LinalgTransformOps.td |  37 ++
 .../Dialect/Linalg/Transforms/Transforms.h|  45 +++
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp  | 281 +++
 .../TransformOps/LinalgTransformOps.cpp   |  27 ++
 .../Linalg/Transforms/WinogradConv2D.cpp  |  18 +
 .../transform-tile-and-winograd-rewrite.mlir  | 332 ++
 7 files changed, 758 insertions(+), 3 deletions(-)
 create mode 100644 
mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index de1097b6ac27b..45726d6ee2224 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,7 +154,12 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd filter transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
@@ -192,7 +197,12 @@ def Linalg_WinogradFilterTransformOp : 
Linalg_Op<"winograd_filter_transform"> {
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd input transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
@@ -230,7 +240,12 @@ def Linalg_WinogradInputTransformOp : 
Linalg_Op<"winograd_input_transform"> {
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd output transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
diff --git 
a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td 
b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 68d0f713caad4..71736eae38b4f 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -2638,4 +2638,41 @@ def WinogradConv2DOp : Op {
+  let description = [{
+Decompose winograd operators. It will convert filter, input and output
+transform operators into a combination of scf, tensor, and linalg
+equivalent operators. Before applying this transform operator, users
+need to tile winograd transform operators into supported sizes.
+
+ Return modes:
+
+This operation fails if `target` is unsupported. Otherwise, the operation
+succeeds and returns a handle of the sequence that replaces the original
+operator.
+  }];
+
+  let arguments = (ins TransformHandleTypeInterface:$target);
+  let results = (outs TransformHandleTypeInterface:$transformed);
+
+  let assemblyFormat =
+"$target attr-dict `:` functional-type($target, results)";
+
+  let builders = [
+OpBuilder<(ins "Value":$target)>
+  ];
+
+  let extraClassDeclaration = [{
+::mlir::DiagnosedSilenceableFailure applyToOne(
+::mlir::transform::TransformRewriter &rewriter,
+::mlir::Operation *target,
+::mlir::transform::ApplyToEachResultList &results,
+::mlir::transform::TransformState &state);
+  }];
+}
+
 #endif // LINALG_TRANSFORM_OPS
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h 
b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index bb7ec590faad0..d0eec2be1f8fb 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1319,6 +1319,51 @@ FailureOr winogradConv2D(RewriterBase 
&rewriter,
   linal

[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-20 Thread Christudasan Devadasan via llvm-branch-commits

cdevadas wrote:

> This looks like it is affecting codegen even when xnack is disabled? That 
> should not happen.

It shouldn't. I put the xnack replay subtarget check before using *_ec 
equivalents. See the code here: 
https://github.com/llvm/llvm-project/commit/65eb44327cf32a83dbbf13eb70f9d8c03f3efaef#diff-35f4d1b6c4c17815f6989f86abbac2e606ca760f9d93f501ff503449048bf760R1735

https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-20 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

I'm still not sure why we have so much in this pass. The load and store 
vectorization should have happened in the IR. This pass originally was for the 
multi offset DS instructions 

https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-20 Thread Matt Arsenault via llvm-branch-commits


@@ -1701,17 +1732,33 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const 
CombineInfo &CI,
   return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
 }
   case S_LOAD_IMM:
-switch (Width) {
-default:
-  return 0;
-case 2:
-  return AMDGPU::S_LOAD_DWORDX2_IMM;
-case 3:
-  return AMDGPU::S_LOAD_DWORDX3_IMM;
-case 4:
-  return AMDGPU::S_LOAD_DWORDX4_IMM;
-case 8:
-  return AMDGPU::S_LOAD_DWORDX8_IMM;
+// For targets that support XNACK replay, use the constrained load opcode.
+if (STI && STI->hasXnackReplay()) {
+  switch (Width) {

arsenm wrote:

One switch and move the condition inside each size case?

https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-20 Thread Matt Arsenault via llvm-branch-commits


@@ -1701,17 +1732,33 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const 
CombineInfo &CI,
   return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
 }
   case S_LOAD_IMM:
-switch (Width) {
-default:
-  return 0;
-case 2:
-  return AMDGPU::S_LOAD_DWORDX2_IMM;
-case 3:
-  return AMDGPU::S_LOAD_DWORDX3_IMM;
-case 4:
-  return AMDGPU::S_LOAD_DWORDX4_IMM;
-case 8:
-  return AMDGPU::S_LOAD_DWORDX8_IMM;
+// For targets that support XNACK replay, use the constrained load opcode.
+if (STI && STI->hasXnackReplay()) {

arsenm wrote:

STI should never be null. The conservative default would be to assume ec if it 
were possible 

https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-20 Thread Jay Foad via llvm-branch-commits

jayfoad wrote:

> > This looks like it is affecting codegen even when xnack is disabled? That 
> > should not happen.
> 
> It shouldn't. I put the xnack replay subtarget check before using *_ec 
> equivalents. See the code here: 
> [65eb443#diff-35f4d1b6c4c17815f6989f86abbac2e606ca760f9d93f501ff503449048bf760R1735](https://github.com/llvm/llvm-project/commit/65eb44327cf32a83dbbf13eb70f9d8c03f3efaef#diff-35f4d1b6c4c17815f6989f86abbac2e606ca760f9d93f501ff503449048bf760R1735)

You're checking `STI->hasXnackReplay()` which is true on all GFX8+ targets. You 
should be checking whether xnack support is enabled with 
`STI->isXNACKEnabled()`.

https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-20 Thread Jay Foad via llvm-branch-commits


@@ -967,6 +967,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasLDSFPAtomicAddF32() const { return GFX8Insts; }
   bool hasLDSFPAtomicAddF64() const { return GFX90AInsts; }
+  bool hasXnackReplay() const { return GFX8Insts; }

jayfoad wrote:

We already have a field SupportsXNACK for this which is hooked up to the 
"xnack-support" target feature.

https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][Transforms] Dialect conversion: Simplify handling of dropped arguments (PR #96207)

2024-06-20 Thread Matthias Springer via llvm-branch-commits

https://github.com/matthias-springer created 
https://github.com/llvm/llvm-project/pull/96207

This commit simplifies the handling of dropped arguments and updates some 
dialect conversion documentation that is outdated.

When converting a block signature, a `BlockTypeConversionRewrite` object and 
potentially multiple `ReplaceBlockArgRewrite` are created. During the "commit" 
phase, uses of the old block arguments are replaced with the new block 
arguments, but the old implementation was written in an inconsistent way: some 
block arguments were replaced in `BlockTypeConversionRewrite::commit` and some 
were replaced in `ReplaceBlockArgRewrite::commit`. The new 
`BlockTypeConversionRewrite::commit` implementation is much simpler and no 
longer modifies any IR; that is done only in `ReplaceBlockArgRewrite` now. The 
`ConvertedArgInfo` data structure is no longer needed.

To that end, materializations of dropped arguments are now built in 
`applySignatureConversion` instead of `materializeLiveConversions`; the latter 
function no longer has to deal with dropped arguments.

Other minor improvements:
- Improve variable name: `origOutputType` -> `origArgType`. Add an assertion to 
check that this field is only used for argument materializations.
- Add more comments to `applySignatureConversion`.

Note: Error messages around failed materializations for dropped basic block 
arguments changed slightly. That is because those materializations are now 
built in `legalizeUnresolvedMaterialization` instead of 
`legalizeConvertedArgumentTypes`.

This commit is in preparation of decoupling argument/source/target 
materializations from the dialect conversion.


>From ed7bb706dc8eaec2f45a8f63c98d9b9b7fac23a8 Mon Sep 17 00:00:00 2001
From: Matthias Springer 
Date: Thu, 20 Jun 2024 11:18:44 +0200
Subject: [PATCH] [mlir][Transforms] Dialect conversion: Simplify handling of
 dropped arguments

This commit simplifies the handling of dropped arguments and updates some 
dialect conversion documentation that is outdated.

When converting a block signature, a `BlockTypeConversionRewrite` object and 
potentially multiple `ReplaceBlockArgRewrite` are created. During the "commit" 
phase, uses of the old block arguments are replaced with the new block 
arguments, but the old implementation was written in an inconsistent way: some 
block arguments were replaced in `BlockTypeConversionRewrite::commit` and some 
were replaced in `ReplaceBlockArgRewrite::commit`. The new 
`BlockTypeConversionRewrite::commit` implementation is much simpler and no 
longer modifies any IR; that is done only in `ReplaceBlockArgRewrite` now. The 
`ConvertedArgInfo` data structure is no longer needed.

To that end, materializations of dropped arguments are now built in 
`applySignatureConversion` instead of `materializeLiveConversions`; the latter 
function no longer has to deal with dropped arguments.

Other minor improvements:
- Improve variable name: `origOutputType` -> `origArgType`. Add an assertion to 
check that this field is only used for argument materializations.
- Add more comments to `applySignatureConversion`.

Note: Error messages around failed materializations for dropped basic block 
arguments changed slightly. That is because those materializations are now 
built in `legalizeUnresolvedMaterialization` instead of 
`legalizeConvertedArgumentTypes`.
---
 mlir/docs/DialectConversion.md|  37 +++-
 .../mlir/Transforms/DialectConversion.h   |  10 +-
 .../Transforms/Utils/DialectConversion.cpp| 208 +++---
 .../test-legalize-type-conversion.mlir|   6 +-
 4 files changed, 111 insertions(+), 150 deletions(-)

diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md
index 69781bb868bbf..f722974a9a1e5 100644
--- a/mlir/docs/DialectConversion.md
+++ b/mlir/docs/DialectConversion.md
@@ -246,6 +246,13 @@ depending on the situation.
 
 -   An argument materialization is used when converting the type of a block
 argument during a [signature conversion](#region-signature-conversion).
+The new block argument types are specified in a `SignatureConversion`
+object. An original block argument can be converted into multiple
+block arguments, which is not supported everywhere in the dialect
+conversion. (E.g., adaptors support only a single replacement value for
+each original value.) Therefore, an argument materialization is used to
+convert potentially multiple new block arguments back into a single SSA
+value.
 
 *   Source Materialization
 
@@ -259,6 +266,9 @@ depending on the situation.
 *   When a block argument has been converted to a different type, but
 the original argument still has users that will remain live after
 the conversion process has finished.
+*   When a block argument has been dropped, but the argument still has
+users that will remain live after the conversion pro

[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95821

>From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:02 -0700
Subject: [PATCH 1/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0fcb1c130002..2bca83c9d11ec 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto& [_, BF] : BC.getBinaryFunctions()) {
+if (!ProfiledFunctions.count(&BF))
+  continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
 

>From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:39 -0700
Subject: [PATCH 2/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2bca83c9d11ec..56474a67307ed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map StrictBinaryFunctionHashes;
+  std::unordered_map StrictBinaryFunctionHashes;
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
-  for (auto& [_, BF] : BC.getBinaryFunctions()) {
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
 if (!ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
@@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   for (auto YamlBF : YamlBP.Functions) {
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-if (It != StrictBinaryFunctionHashes.end() && 
!ProfiledFunctions.count(It->second)) {
+if (It != StrictBinaryFunctionHashes.end() &&
+!ProfiledFunctions.count(It->second)) {
   auto *BF = It->second;
   matchProfileToFunction(YamlBF, *BF);
 }

>From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:55:58 -0700
Subject: [PATCH 3/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp   | 2 +-
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 56474a67307ed..779d60bce3b66 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto &[_, BF] : BC.getBinaryFunctions()) {
-if (!ProfiledFunctions.count(&BF))
+if (ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
diff --git a/bolt/test/X86/profile-passthrough-block.test 
b/bolt/test/X86/profile-passthrough-block.test
index 1b875885260dc..ed2a8117ddfc4 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:main
 fid: 0
-hash:0x
+hash:0x0001
 exec:1
 nblocks: 6
 blocks:

>From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:58:22 -0700
Subject: [PATCH 4/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 779d60bce3b66..e3d30bfdb74e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   }
 
   for (auto YamlBF : YamlBP.Functions) {
+if (YamlBF.Used)
+  continue;
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
 if (It != StrictBinaryFunctionHashes.end() &&
 !ProfiledFunctions.count(It->second)) {

>From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 16:00:27 -0700
Subject: [PATCH 5/7] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

dif

[llvm-branch-commits] [mlir] [mlir][Transforms] Dialect conversion: Simplify handling of dropped arguments (PR #96207)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-core

Author: Matthias Springer (matthias-springer)


Changes

This commit simplifies the handling of dropped arguments and updates some 
dialect conversion documentation that is outdated.

When converting a block signature, a `BlockTypeConversionRewrite` object and 
potentially multiple `ReplaceBlockArgRewrite` are created. During the "commit" 
phase, uses of the old block arguments are replaced with the new block 
arguments, but the old implementation was written in an inconsistent way: some 
block arguments were replaced in `BlockTypeConversionRewrite::commit` and some 
were replaced in `ReplaceBlockArgRewrite::commit`. The new 
`BlockTypeConversionRewrite::commit` implementation is much simpler and no 
longer modifies any IR; that is done only in `ReplaceBlockArgRewrite` now. The 
`ConvertedArgInfo` data structure is no longer needed.

To that end, materializations of dropped arguments are now built in 
`applySignatureConversion` instead of `materializeLiveConversions`; the latter 
function no longer has to deal with dropped arguments.

Other minor improvements:
- Improve variable name: `origOutputType` -> `origArgType`. Add an assertion 
to check that this field is only used for argument materializations.
- Add more comments to `applySignatureConversion`.

Note: Error messages around failed materializations for dropped basic block 
arguments changed slightly. That is because those materializations are now 
built in `legalizeUnresolvedMaterialization` instead of 
`legalizeConvertedArgumentTypes`.

This commit is in preparation of decoupling argument/source/target 
materializations from the dialect conversion.


---

Patch is 24.50 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96207.diff


4 Files Affected:

- (modified) mlir/docs/DialectConversion.md (+25-12) 
- (modified) mlir/include/mlir/Transforms/DialectConversion.h (+6-4) 
- (modified) mlir/lib/Transforms/Utils/DialectConversion.cpp (+78-130) 
- (modified) mlir/test/Transforms/test-legalize-type-conversion.mlir (+2-4) 


``diff
diff --git a/mlir/docs/DialectConversion.md b/mlir/docs/DialectConversion.md
index 69781bb868bbf..f722974a9a1e5 100644
--- a/mlir/docs/DialectConversion.md
+++ b/mlir/docs/DialectConversion.md
@@ -246,6 +246,13 @@ depending on the situation.
 
 -   An argument materialization is used when converting the type of a block
 argument during a [signature conversion](#region-signature-conversion).
+The new block argument types are specified in a `SignatureConversion`
+object. An original block argument can be converted into multiple
+block arguments, which is not supported everywhere in the dialect
+conversion. (E.g., adaptors support only a single replacement value for
+each original value.) Therefore, an argument materialization is used to
+convert potentially multiple new block arguments back into a single SSA
+value.
 
 *   Source Materialization
 
@@ -259,6 +266,9 @@ depending on the situation.
 *   When a block argument has been converted to a different type, but
 the original argument still has users that will remain live after
 the conversion process has finished.
+*   When a block argument has been dropped, but the argument still has
+users that will remain live after the conversion process has
+finished.
 *   When the result type of an operation has been converted to a
 different type, but the original result still has users that will
 remain live after the conversion process is finished.
@@ -330,17 +340,19 @@ class TypeConverter {
 
   /// Register a materialization function, which must be convertible to the
   /// following form:
-  ///   `Optional (OpBuilder &, T, ValueRange, Location)`,
-  ///   where `T` is any subclass of `Type`.
-  /// This function is responsible for creating an operation, using the
-  /// OpBuilder and Location provided, that "converts" a range of values into a
-  /// single value of the given type `T`. It must return a Value of the
-  /// converted type on success, an `std::nullopt` if it failed but other
-  /// materialization can be attempted, and `nullptr` on unrecoverable failure.
-  /// It will only be called for (sub)types of `T`.
+  ///   `std::optional(OpBuilder &, T, ValueRange, Location)`,
+  /// where `T` is any subclass of `Type`. This function is responsible for
+  /// creating an operation, using the OpBuilder and Location provided, that
+  /// "casts" a range of values into a single value of the given type `T`. It
+  /// must return a Value of the converted type on success, an `std::nullopt` 
if
+  /// it failed but other materialization can be attempted, and `nullptr` on
+  /// unrecoverable failure. It will only be called for (sub)types of `T`.
+  /// Materialization functions must be provide

[llvm-branch-commits] [mlir] [mlir][linalg] Implement TilingInterface for winograd operators (PR #96184)

2024-06-20 Thread Hsiangkai Wang via llvm-branch-commits

https://github.com/Hsiangkai updated 
https://github.com/llvm/llvm-project/pull/96184

>From 73b524b7746839614655fd8082dbda297e93ba72 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang 
Date: Mon, 17 Jun 2024 11:44:27 +0100
Subject: [PATCH 1/2] [mlir][linalg] Implement TilingInterface for winograd
 operators

In order to support arbitrary size input data of conv2d, implement
TilingInterface for winograd operators. Before converting winograd
operators into nested loops with matrix multiply, tile the input of
conv2d into the supported size first.

Add a transform operator structured.decompose_winograd_op to decompose
winograd operators. Before applying the transform op, use tile_using_for
to tile the input data into supported size. The test case shows how to
tile and decompose winograd operators.
---
 .../mlir/Dialect/Linalg/IR/LinalgOps.td   |  21 +-
 .../Linalg/TransformOps/LinalgTransformOps.td |  37 ++
 .../Dialect/Linalg/Transforms/Transforms.h|  45 +++
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp  | 281 +++
 .../TransformOps/LinalgTransformOps.cpp   |  27 ++
 .../Linalg/Transforms/WinogradConv2D.cpp  |  18 +
 .../transform-tile-and-winograd-rewrite.mlir  | 332 ++
 7 files changed, 758 insertions(+), 3 deletions(-)
 create mode 100644 
mlir/test/Dialect/Linalg/transform-tile-and-winograd-rewrite.mlir

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td 
b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index de1097b6ac27b..45726d6ee2224 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -154,7 +154,12 @@ def Linalg_SoftmaxOp : Linalg_Op<"softmax",
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform"> {
+def Linalg_WinogradFilterTransformOp : Linalg_Op<"winograd_filter_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd filter transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
@@ -192,7 +197,12 @@ def Linalg_WinogradFilterTransformOp : 
Linalg_Op<"winograd_filter_transform"> {
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform"> {
+def Linalg_WinogradInputTransformOp : Linalg_Op<"winograd_input_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd input transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
@@ -230,7 +240,12 @@ def Linalg_WinogradInputTransformOp : 
Linalg_Op<"winograd_input_transform"> {
   let hasVerifier = 1;
 }
 
-def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform"> {
+def Linalg_WinogradOutputTransformOp : Linalg_Op<"winograd_output_transform",
+[DeclareOpInterfaceMethods]> {
   let summary = "Winograd output transform operator";
   let description = [{
 Winograd Conv2D algorithm will convert linalg Conv2D operator into batched
diff --git 
a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td 
b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 68d0f713caad4..71736eae38b4f 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -2638,4 +2638,41 @@ def WinogradConv2DOp : Op {
+  let description = [{
+Decompose winograd operators. It will convert filter, input and output
+transform operators into a combination of scf, tensor, and linalg
+equivalent operators. Before applying this transform operator, users
+need to tile winograd transform operators into supported sizes.
+
+ Return modes:
+
+This operation fails if `target` is unsupported. Otherwise, the operation
+succeeds and returns a handle of the sequence that replaces the original
+operator.
+  }];
+
+  let arguments = (ins TransformHandleTypeInterface:$target);
+  let results = (outs TransformHandleTypeInterface:$transformed);
+
+  let assemblyFormat =
+"$target attr-dict `:` functional-type($target, results)";
+
+  let builders = [
+OpBuilder<(ins "Value":$target)>
+  ];
+
+  let extraClassDeclaration = [{
+::mlir::DiagnosedSilenceableFailure applyToOne(
+::mlir::transform::TransformRewriter &rewriter,
+::mlir::Operation *target,
+::mlir::transform::ApplyToEachResultList &results,
+::mlir::transform::TransformState &state);
+  }];
+}
+
 #endif // LINALG_TRANSFORM_OPS
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h 
b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index bb7ec590faad0..d0eec2be1f8fb 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1319,6 +1319,51 @@ FailureOr winogradConv2D(RewriterBase 
&rewriter,
   l

[llvm-branch-commits] [libcxx] [libc++] Implement std::move_only_function (P0288R9) (PR #94670)

2024-06-20 Thread Louis Dionne via llvm-branch-commits

https://github.com/ldionne requested changes to this pull request.

I am not certain how to best make a decision about the bit-stealing mechanism 
yet, but I do have a few comments.

https://github.com/llvm/llvm-project/pull/94670
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Implement std::move_only_function (P0288R9) (PR #94670)

2024-06-20 Thread Louis Dionne via llvm-branch-commits

https://github.com/ldionne edited 
https://github.com/llvm/llvm-project/pull/94670
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Implement std::move_only_function (P0288R9) (PR #94670)

2024-06-20 Thread Louis Dionne via llvm-branch-commits


@@ -0,0 +1,93 @@
+//===--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef _LIBCPP___FUNCTIONAL_MOVE_ONLY_FUNCTION_H
+#define _LIBCPP___FUNCTIONAL_MOVE_ONLY_FUNCTION_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 23
+
+// move_only_function design:
+//
+// move_only_function has a small buffer with a size of `3 * sizeof(void*)` 
bytes. This buffer can only be used when the
+// object that should be stored is trivially relocatable (currently only when 
it is trivially move constructible and
+// trivially destructible). There is also a bool in the lower bits of the vptr 
stored which is set when the contained
+// object is not trivially destructible.
+//
+// trivially relocatable: It would also be possible to store 
nothrow_move_constructible types, but that would mean
+// that move_only_function itself would not be trivially relocatable anymore. 
The decision to keep move_only_function
+// trivially relocatable was made because we expect move_only_function to be 
mostly used to store a functor. To only
+// forward functors there is std::function_ref (not voted in yet, expected in 
C++26).
+//
+// buffer size: We did a survey of six implementations from various vendors. 
Three of them had a buffer size of 24 bytes
+// on 64 bit systems. This also allows storing a std::string or std::vector 
inside the small buffer (once the compiler
+// has full support of trivially_relocatable annotations).
+//
+// trivially-destructible bit: This allows us to keep the overall binary size 
smaller because we don't have to store
+// a pointer to a noop function inside the vtable. It also avoids loading the 
vtable during destruction, potentially
+// resulting in fewer cache misses. The downside is that calling the function 
now also requires setting the lower bits
+// of the pointer to zero, but this is a very fast operation on modern CPUs.

ldionne wrote:

I would like us to document the design constraints around avoiding 
double-wrapping when constructing a `std::move_ony_function` from a 
`std::copyable_function` that we discussed just now. Gist of it:

This requires having compatible vtables in both implementations, where 
compatible means that the layout is the same but also that the implementation 
of the vtable functions can be swapped for one another. Basically, this means 
we'll need to use small buffer sizes where `sizeof(move_only_function) >= 
sizeof(copyable_function)`, and have the same criteria for when we put it in 
the small buffer (trivially-relocatable?).

IMO these requirements make it important to reuse the same machinery for 
implementing both classes. I am not certain in what form we want to do that 
yet, but I am thinking that there should be a single place where we encode 
these decisions (the small buffer size, the vtable layout, the condition for 
being in the SBO, etc). I don't think those belong in the `__small_buffer` 
class itself since that can be reused for stuff that isn't `move_only_function` 
or `copyable_function`, but it should be somewhere. Do you have thoughts on 
this?

https://github.com/llvm/llvm-project/pull/94670
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Implement std::move_only_function (P0288R9) (PR #94670)

2024-06-20 Thread Louis Dionne via llvm-branch-commits


@@ -0,0 +1,93 @@
+//===--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef _LIBCPP___FUNCTIONAL_MOVE_ONLY_FUNCTION_H
+#define _LIBCPP___FUNCTIONAL_MOVE_ONLY_FUNCTION_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 23
+
+// move_only_function design:

ldionne wrote:

We should keep this type experimental until we have an implementation of 
`copyable_function` since those will need to interact in non-trivial ways.

https://github.com/llvm/llvm-project/pull/94670
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] [libc++] Implement std::move_only_function (P0288R9) (PR #94670)

2024-06-20 Thread Louis Dionne via llvm-branch-commits


@@ -0,0 +1,93 @@
+//===--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef _LIBCPP___FUNCTIONAL_MOVE_ONLY_FUNCTION_H
+#define _LIBCPP___FUNCTIONAL_MOVE_ONLY_FUNCTION_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 23
+
+// move_only_function design:
+//
+// move_only_function has a small buffer with a size of `3 * sizeof(void*)` 
bytes. This buffer can only be used when the
+// object that should be stored is trivially relocatable (currently only when 
it is trivially move constructible and
+// trivially destructible). There is also a bool in the lower bits of the vptr 
stored which is set when the contained
+// object is not trivially destructible.
+//
+// trivially relocatable: It would also be possible to store 
nothrow_move_constructible types, but that would mean
+// that move_only_function itself would not be trivially relocatable anymore. 
The decision to keep move_only_function
+// trivially relocatable was made because we expect move_only_function to be 
mostly used to store a functor. To only
+// forward functors there is std::function_ref (not voted in yet, expected in 
C++26).

ldionne wrote:

```suggestion
// forward functors there is C++26's std::function_ref.
```

https://github.com/llvm/llvm-project/pull/94670
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [MLIR][OpenMP] Clause-based OpenMP operation definition (PR #92523)

2024-06-20 Thread Tom Eccles via llvm-branch-commits

tblah wrote:

> I guess fixing byref is on me (#92244). Unfortunately I can't work on this 
> immediately so I won't hold up this PR for it.

@skatrak does https://github.com/llvm/llvm-project/pull/96215 cover everything 
you need?

https://github.com/llvm/llvm-project/pull/92523
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95821

>From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:02 -0700
Subject: [PATCH 1/8] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0fcb1c130002..2bca83c9d11ec 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto& [_, BF] : BC.getBinaryFunctions()) {
+if (!ProfiledFunctions.count(&BF))
+  continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
 

>From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:39 -0700
Subject: [PATCH 2/8] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2bca83c9d11ec..56474a67307ed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map StrictBinaryFunctionHashes;
+  std::unordered_map StrictBinaryFunctionHashes;
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
-  for (auto& [_, BF] : BC.getBinaryFunctions()) {
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
 if (!ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
@@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   for (auto YamlBF : YamlBP.Functions) {
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-if (It != StrictBinaryFunctionHashes.end() && 
!ProfiledFunctions.count(It->second)) {
+if (It != StrictBinaryFunctionHashes.end() &&
+!ProfiledFunctions.count(It->second)) {
   auto *BF = It->second;
   matchProfileToFunction(YamlBF, *BF);
 }

>From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:55:58 -0700
Subject: [PATCH 3/8] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp   | 2 +-
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 56474a67307ed..779d60bce3b66 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto &[_, BF] : BC.getBinaryFunctions()) {
-if (!ProfiledFunctions.count(&BF))
+if (ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
diff --git a/bolt/test/X86/profile-passthrough-block.test 
b/bolt/test/X86/profile-passthrough-block.test
index 1b875885260dc..ed2a8117ddfc4 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:main
 fid: 0
-hash:0x
+hash:0x0001
 exec:1
 nblocks: 6
 blocks:

>From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:58:22 -0700
Subject: [PATCH 4/8] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 779d60bce3b66..e3d30bfdb74e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   }
 
   for (auto YamlBF : YamlBP.Functions) {
+if (YamlBF.Used)
+  continue;
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
 if (It != StrictBinaryFunctionHashes.end() &&
 !ProfiledFunctions.count(It->second)) {

>From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 16:00:27 -0700
Subject: [PATCH 5/8] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

dif

[llvm-branch-commits] [libcxx] Add release note for #95264 (PR #96116)

2024-06-20 Thread Louis Dionne via llvm-branch-commits

https://github.com/ldionne updated 
https://github.com/llvm/llvm-project/pull/96116

>From 4044e7c930381e5e070c7131c5b14a3dfd373259 Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Wed, 19 Jun 2024 16:50:07 -0400
Subject: [PATCH 1/2] [libc++] Add release note for #95264

---
 libcxx/docs/ReleaseNotes/18.rst | 9 +
 1 file changed, 9 insertions(+)

diff --git a/libcxx/docs/ReleaseNotes/18.rst b/libcxx/docs/ReleaseNotes/18.rst
index 7ea13e6943dd4..3e19e7c33f6af 100644
--- a/libcxx/docs/ReleaseNotes/18.rst
+++ b/libcxx/docs/ReleaseNotes/18.rst
@@ -328,6 +328,15 @@ ABI Affecting Changes
   done to fix `#70494 `_ 
and the vendor communication is handled
   in `#70820 `_.
 
+- LLVM 18.1.8 Fixed an issue that caused ``std::string`` to pass an incorrect 
size to ``allocator_traits::deallocate``
+  when deallocating memory. The impact is different depending on a few factors:
+  - Users who don't use a custom allocator in ``std::string`` and don't enable 
sized deallocation (which is
+off by default in Clang 18) will not be affected. This is expected to be 
the vast majority of users.
+  - Users who don't use a custom allocator in ``std::string`` but are enabling 
sized deallocation (e.g. with
+``-fsized-deallocation``) will notice that ``operator delete(void*, 
size_t)`` is now being passed the correct
+size. This likely has no impact if they were not customizing ``operator 
delete``.
+  - Users who use a custom allocator in ``std::string`` will notice that they 
now get passed the correct allocation
+size upon deallocation.
 
 Build System Changes
 

>From 99a23b574283752ffdc69934c32f9f825ddf10ed Mon Sep 17 00:00:00 2001
From: Louis Dionne 
Date: Thu, 20 Jun 2024 13:46:04 -0400
Subject: [PATCH 2/2] Try to fix RST syntax error

---
 libcxx/docs/ReleaseNotes/18.rst | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/libcxx/docs/ReleaseNotes/18.rst b/libcxx/docs/ReleaseNotes/18.rst
index 3e19e7c33f6af..f7c269acda8b2 100644
--- a/libcxx/docs/ReleaseNotes/18.rst
+++ b/libcxx/docs/ReleaseNotes/18.rst
@@ -330,13 +330,14 @@ ABI Affecting Changes
 
 - LLVM 18.1.8 Fixed an issue that caused ``std::string`` to pass an incorrect 
size to ``allocator_traits::deallocate``
   when deallocating memory. The impact is different depending on a few factors:
-  - Users who don't use a custom allocator in ``std::string`` and don't enable 
sized deallocation (which is
-off by default in Clang 18) will not be affected. This is expected to be 
the vast majority of users.
-  - Users who don't use a custom allocator in ``std::string`` but are enabling 
sized deallocation (e.g. with
-``-fsized-deallocation``) will notice that ``operator delete(void*, 
size_t)`` is now being passed the correct
-size. This likely has no impact if they were not customizing ``operator 
delete``.
-  - Users who use a custom allocator in ``std::string`` will notice that they 
now get passed the correct allocation
-size upon deallocation.
+
+- Users who don't use a custom allocator in ``std::string`` and don't 
enable sized deallocation (which is
+  off by default in Clang 18) will not be affected. This is expected to be 
the vast majority of users.
+- Users who don't use a custom allocator in ``std::string`` but are 
enabling sized deallocation (e.g. with
+  ``-fsized-deallocation``) will notice that ``operator delete(void*, 
size_t)`` is now being passed the correct
+  size. This likely has no impact if they were not customizing ``operator 
delete``.
+- Users who use a custom allocator in ``std::string`` will notice that 
they now get passed the correct allocation
+  size upon deallocation.
 
 Build System Changes
 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-20 Thread Alexander Yermolovich via llvm-branch-commits


@@ -374,15 +377,33 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 // the profile.
 Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
 
-// Recompute hash once per function.
-if (!opts::IgnoreHash)
-  Function.computeHash(YamlBP.Header.IsDFSOrder,
-   YamlBP.Header.HashFunction);
-
 if (profileMatches(YamlBF, Function))
   matchProfileToFunction(YamlBF, Function);
   }
 
+  // Uses the strict hash of profiled and binary functions to match functions
+  // that are not matched by name or common name.
+  if (!opts::IgnoreHash) {
+std::unordered_map StrictHashToBF;
+StrictHashToBF.reserve(BC.getBinaryFunctions().size());
+
+for (auto &[_, BF] : BC.getBinaryFunctions()) {

ayermolo wrote:

no need for {} since it's one line.

https://github.com/llvm/llvm-project/pull/95821
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-20 Thread Alexander Yermolovich via llvm-branch-commits


@@ -374,15 +377,33 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 // the profile.
 Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
 
-// Recompute hash once per function.
-if (!opts::IgnoreHash)
-  Function.computeHash(YamlBP.Header.IsDFSOrder,
-   YamlBP.Header.HashFunction);
-
 if (profileMatches(YamlBF, Function))
   matchProfileToFunction(YamlBF, Function);
   }
 
+  // Uses the strict hash of profiled and binary functions to match functions
+  // that are not matched by name or common name.
+  if (!opts::IgnoreHash) {
+std::unordered_map StrictHashToBF;
+StrictHashToBF.reserve(BC.getBinaryFunctions().size());
+
+for (auto &[_, BF] : BC.getBinaryFunctions()) {
+  StrictHashToBF[BF.getHash()] = &BF;
+}
+
+for (auto YamlBF : YamlBP.Functions) {

ayermolo wrote:

Specify actual type.
This can also hide copy since & is not used.

https://github.com/llvm/llvm-project/pull/95821
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Define ptrauth_sign_constant builtin. (PR #93904)

2024-06-20 Thread Ahmed Bougacha via llvm-branch-commits

https://github.com/ahmedbougacha updated 
https://github.com/llvm/llvm-project/pull/93904

>From 20bbad26fa9f068910baf50b5abb60a0f4557564 Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha 
Date: Thu, 30 May 2024 17:33:04 -0700
Subject: [PATCH 1/6] [clang] Define ptrauth_sign_constant builtin.

This is constant-expression equivalent to __builtin_ptrauth_sign,
allowing its usage in global initializers, but requiring constant
pointers and discriminators.

Co-Authored-By: John McCall 
---
 clang/include/clang/Basic/Builtins.td |   6 +
 .../clang/Basic/DiagnosticSemaKinds.td|   7 +
 clang/include/clang/CodeGen/CodeGenABITypes.h |   6 +
 clang/lib/AST/ExprConstant.cpp|   1 +
 clang/lib/CodeGen/CGBuiltin.cpp   |   3 +
 clang/lib/CodeGen/CGExprConstant.cpp  |  62 +
 clang/lib/CodeGen/CGPointerAuth.cpp   |  77 +++
 clang/lib/CodeGen/CMakeLists.txt  |   1 +
 clang/lib/CodeGen/CodeGenModule.h |   5 +
 clang/lib/Headers/ptrauth.h   |  25 
 clang/lib/Sema/SemaChecking.cpp   | 128 --
 .../CodeGen/ptrauth-intrinsic-sign-constant.c |  20 +++
 clang/test/Sema/ptrauth-intrinsics-macro.c|   4 +
 clang/test/Sema/ptrauth.c |  28 
 14 files changed, 359 insertions(+), 14 deletions(-)
 create mode 100644 clang/lib/CodeGen/CGPointerAuth.cpp
 create mode 100644 clang/test/CodeGen/ptrauth-intrinsic-sign-constant.c

diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index e07ddf3b9b70b..9342b6bc75fc8 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4393,6 +4393,12 @@ def PtrauthSignUnauthenticated : Builtin {
   let Prototype = "void*(void*,int,void*)";
 }
 
+def PtrauthSignConstant : Builtin {
+  let Spellings = ["__builtin_ptrauth_sign_constant"];
+  let Attributes = [CustomTypeChecking, NoThrow, Const, Constexpr];
+  let Prototype = "void*(void*,int,void*)";
+}
+
 def PtrauthSignGenericData : Builtin {
   let Spellings = ["__builtin_ptrauth_sign_generic_data"];
   let Attributes = [CustomTypeChecking, NoThrow, Const];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 0900dad3c18cd..a5675879f45bc 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -924,6 +924,13 @@ def err_ptrauth_value_bad_type :
   Error<"%select{signed value|extra discriminator|blended pointer|blended "
 "integer}0 must have %select{pointer|integer|pointer or integer}1 "
 "type; type here is %2">;
+def err_ptrauth_bad_constant_pointer :
+  Error<"argument to ptrauth_sign_constant must refer to a global variable "
+"or function">;
+def err_ptrauth_bad_constant_discriminator :
+  Error<"discriminator argument to ptrauth_sign_constant must be a constant "
+"integer, the address of the global variable where the result "
+"will be stored, or a blend of the two">;
 def warn_ptrauth_sign_null_pointer :
   Warning<"signing a null pointer will yield a non-null pointer">,
   InGroup;
diff --git a/clang/include/clang/CodeGen/CodeGenABITypes.h 
b/clang/include/clang/CodeGen/CodeGenABITypes.h
index fda0855dc8683..8c62d8597ecbe 100644
--- a/clang/include/clang/CodeGen/CodeGenABITypes.h
+++ b/clang/include/clang/CodeGen/CodeGenABITypes.h
@@ -104,6 +104,12 @@ llvm::Type *convertTypeForMemory(CodeGenModule &CGM, 
QualType T);
 unsigned getLLVMFieldNumber(CodeGenModule &CGM,
 const RecordDecl *RD, const FieldDecl *FD);
 
+/// Return a signed constant pointer.
+llvm::Constant *getConstantSignedPointer(CodeGenModule &CGM,
+ llvm::Constant *pointer,
+ unsigned key,
+ llvm::Constant *storageAddress,
+ llvm::Constant *otherDiscriminator);
 /// Given the language and code-generation options that Clang was configured
 /// with, set the default LLVM IR attributes for a function definition.
 /// The attributes set here are mostly global target-configuration and
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 97b4c2080e14f..799872fe13c08 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -2042,6 +2042,7 @@ static bool IsNoOpCall(const CallExpr *E) {
   unsigned Builtin = E->getBuiltinCallee();
   return (Builtin == Builtin::BI__builtin___CFStringMakeConstantString ||
   Builtin == Builtin::BI__builtin___NSStringMakeConstantString ||
+  Builtin == Builtin::BI__builtin_ptrauth_sign_constant ||
   Builtin == Builtin::BI__builtin_function_start);
 }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 08a89bd123d03..fd4fe1633ea29 100644
--- a/clang/li

[llvm-branch-commits] [clang] [clang] Define ptrauth_sign_constant builtin. (PR #93904)

2024-06-20 Thread Ahmed Bougacha via llvm-branch-commits


@@ -354,6 +354,23 @@ Given that ``signedPointer`` matches the layout for signed 
pointers signed with
 the given key, extract the raw pointer from it.  This operation does not trap
 and cannot fail, even if the pointer is not validly signed.
 
+``ptrauth_sign_constant``
+^
+
+.. code-block:: c
+
+  ptrauth_sign_constant(pointer, key, discriminator)
+
+Return a signed pointer for a constant address in a manner which guarantees
+a non-attackable sequence.
+
+``pointer`` must be a constant expression of pointer type which evaluates to
+a non-null pointer.  The result will have the same type as ``discriminator``.

ahmedbougacha wrote:

These two lines are just leftovers;  I deleted them in 1201821dc1e4.  The below 
is the same.

https://github.com/llvm/llvm-project/pull/93904
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Implement pointer authentication for C++ virtual functions, v-tables, and VTTs (PR #94056)

2024-06-20 Thread Anton Korobeynikov via llvm-branch-commits


@@ -296,3 +296,21 @@ 
ConstantAggregateBuilderBase::finishStruct(llvm::StructType *ty) {
   buffer.erase(buffer.begin() + Begin, buffer.end());
   return constant;
 }
+

asl wrote:

Yeah, LGTM!

https://github.com/llvm/llvm-project/pull/94056
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95821

>From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:02 -0700
Subject: [PATCH 1/9] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0fcb1c130002..2bca83c9d11ec 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto& [_, BF] : BC.getBinaryFunctions()) {
+if (!ProfiledFunctions.count(&BF))
+  continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
 

>From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:39 -0700
Subject: [PATCH 2/9] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2bca83c9d11ec..56474a67307ed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map StrictBinaryFunctionHashes;
+  std::unordered_map StrictBinaryFunctionHashes;
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
-  for (auto& [_, BF] : BC.getBinaryFunctions()) {
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
 if (!ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
@@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   for (auto YamlBF : YamlBP.Functions) {
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-if (It != StrictBinaryFunctionHashes.end() && 
!ProfiledFunctions.count(It->second)) {
+if (It != StrictBinaryFunctionHashes.end() &&
+!ProfiledFunctions.count(It->second)) {
   auto *BF = It->second;
   matchProfileToFunction(YamlBF, *BF);
 }

>From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:55:58 -0700
Subject: [PATCH 3/9] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp   | 2 +-
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 56474a67307ed..779d60bce3b66 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto &[_, BF] : BC.getBinaryFunctions()) {
-if (!ProfiledFunctions.count(&BF))
+if (ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
diff --git a/bolt/test/X86/profile-passthrough-block.test 
b/bolt/test/X86/profile-passthrough-block.test
index 1b875885260dc..ed2a8117ddfc4 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:main
 fid: 0
-hash:0x
+hash:0x0001
 exec:1
 nblocks: 6
 blocks:

>From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:58:22 -0700
Subject: [PATCH 4/9] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 779d60bce3b66..e3d30bfdb74e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   }
 
   for (auto YamlBF : YamlBP.Functions) {
+if (YamlBF.Used)
+  continue;
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
 if (It != StrictBinaryFunctionHashes.end() &&
 !ProfiledFunctions.count(It->second)) {

>From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 16:00:27 -0700
Subject: [PATCH 5/9] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

dif

[llvm-branch-commits] [llvm] [AMDGPU] Codegen support for constrained multi-dword sloads (PR #96163)

2024-06-20 Thread Stanislav Mekhanoshin via llvm-branch-commits


@@ -886,26 +977,17 @@ multiclass SMRD_Pattern  {
   def : GCNPat <
 (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
 (vt (!cast(Instr#"_SGPR") $sbase, $soffset, 0))> {
-let OtherPredicates = [isNotGFX9Plus];
-  }
-  def : GCNPat <
-(smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
-(vt (!cast(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))> {
-let OtherPredicates = [isGFX9Plus];
+let OtherPredicates = [isGFX6GFX7];
   }
 
-  // 4. SGPR+IMM offset
+  // 4. No offset
   def : GCNPat <
-(smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)),
-(vt (!cast(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> {
-let OtherPredicates = [isGFX9Plus];
+(vt (smrd_load (i64 SReg_64:$sbase))),
+(vt (!cast(Instr#"_IMM") i64:$sbase, 0, 0))> {
+let OtherPredicates = [isGFX6GFX7];
   }
 
-  // 5. No offset
-  def : GCNPat <
-(vt (smrd_load (i64 SReg_64:$sbase))),
-(vt (!cast(Instr#"_IMM") i64:$sbase, 0, 0))
-  >;
+  defm : SMRD_Align_Pattern;

rampitec wrote:

You can avoid duplicating patterns for aligned case, you just need to check if 
xnack is on (and it is off before gfx8).
I also do not see xnack checked anywhere.

https://github.com/llvm/llvm-project/pull/96163
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95821

>From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:02 -0700
Subject: [PATCH 01/10] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0fcb1c130002..2bca83c9d11ec 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto& [_, BF] : BC.getBinaryFunctions()) {
+if (!ProfiledFunctions.count(&BF))
+  continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
 

>From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:39 -0700
Subject: [PATCH 02/10] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2bca83c9d11ec..56474a67307ed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map StrictBinaryFunctionHashes;
+  std::unordered_map StrictBinaryFunctionHashes;
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
-  for (auto& [_, BF] : BC.getBinaryFunctions()) {
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
 if (!ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
@@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   for (auto YamlBF : YamlBP.Functions) {
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-if (It != StrictBinaryFunctionHashes.end() && 
!ProfiledFunctions.count(It->second)) {
+if (It != StrictBinaryFunctionHashes.end() &&
+!ProfiledFunctions.count(It->second)) {
   auto *BF = It->second;
   matchProfileToFunction(YamlBF, *BF);
 }

>From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:55:58 -0700
Subject: [PATCH 03/10] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp   | 2 +-
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 56474a67307ed..779d60bce3b66 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto &[_, BF] : BC.getBinaryFunctions()) {
-if (!ProfiledFunctions.count(&BF))
+if (ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
diff --git a/bolt/test/X86/profile-passthrough-block.test 
b/bolt/test/X86/profile-passthrough-block.test
index 1b875885260dc..ed2a8117ddfc4 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:main
 fid: 0
-hash:0x
+hash:0x0001
 exec:1
 nblocks: 6
 blocks:

>From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:58:22 -0700
Subject: [PATCH 04/10] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 779d60bce3b66..e3d30bfdb74e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   }
 
   for (auto YamlBF : YamlBP.Functions) {
+if (YamlBF.Used)
+  continue;
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
 if (It != StrictBinaryFunctionHashes.end() &&
 !ProfiledFunctions.count(It->second)) {

>From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 16:00:27 -0700
Subject: [PATCH 05/10] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 1 file changed, 1 insertion(+), 1 deleti

[llvm-branch-commits] [clang] 508bc2e - Revert "[ubsan] Display correct runtime messages for negative _BitInt (#93612)"

2024-06-20 Thread via llvm-branch-commits

Author: earnol
Date: 2024-06-20T17:39:06-04:00
New Revision: 508bc2e44b59c26955e9b1226f84b4ff2cc05fee

URL: 
https://github.com/llvm/llvm-project/commit/508bc2e44b59c26955e9b1226f84b4ff2cc05fee
DIFF: 
https://github.com/llvm/llvm-project/commit/508bc2e44b59c26955e9b1226f84b4ff2cc05fee.diff

LOG: Revert "[ubsan] Display correct runtime messages for negative _BitInt 
(#93612)"

This reverts commit 49001d584c69726eb7b5069468c5216da3fc3263.

Added: 


Modified: 
clang/lib/CodeGen/CGExpr.cpp
compiler-rt/lib/ubsan/ubsan_value.cpp
compiler-rt/lib/ubsan/ubsan_value.h

Removed: 
compiler-rt/test/ubsan/TestCases/Integer/bit-int-pass.c
compiler-rt/test/ubsan/TestCases/Integer/bit-int.c



diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 55b2b4337fab8..3dfe5e09c778d 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -41,7 +41,6 @@
 #include "llvm/IR/MatrixBuilder.h"
 #include "llvm/Passes/OptimizationLevel.h"
 #include "llvm/Support/ConvertUTF.h"
-#include "llvm/Support/Endian.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SaveAndRestore.h"
@@ -65,22 +64,6 @@ static llvm::cl::opt ClSanitizeGuardChecks(
 "ubsan-guard-checks", llvm::cl::Optional,
 llvm::cl::desc("Guard UBSAN checks with `llvm.allow.ubsan.check()`."));
 
-//======//
-//Defines for metadata
-//======//
-
-// Those values are crucial to be the SAME as in ubsan runtime library.
-enum VariableTypeDescriptorKind : uint16_t {
-  /// An integer type.
-  TK_Integer = 0x,
-  /// A floating-point type.
-  TK_Float = 0x0001,
-  /// An _BitInt(N) type.
-  TK_BitInt = 0x0002,
-  /// Any other type. The value representation is unspecified.
-  TK_Unknown = 0x
-};
-
 //======//
 //Miscellaneous Helper Methods
 //======//
@@ -3315,40 +3298,22 @@ LValue CodeGenFunction::EmitPredefinedLValue(const 
PredefinedExpr *E) {
 ///   { i16 TypeKind, i16 TypeInfo }
 /// \endcode
 ///
-/// followed by an array of i8 containing the type name with extra information
-/// for BitInt. TypeKind is TK_Integer(0) for an integer, TK_Float(1) for a
-/// floating point value, TK_BitInt(2) for BitInt and TK_Unknown(0x) for
-/// anything else.
+/// followed by an array of i8 containing the type name. TypeKind is 0 for an
+/// integer, 1 for a floating point value, and -1 for anything else.
 llvm::Constant *CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   // Only emit each type's descriptor once.
   if (llvm::Constant *C = CGM.getTypeDescriptorFromMap(T))
 return C;
 
-  uint16_t TypeKind = TK_Unknown;
+  uint16_t TypeKind = -1;
   uint16_t TypeInfo = 0;
-  bool IsBitInt = false;
 
   if (T->isIntegerType()) {
-TypeKind = TK_Integer;
+TypeKind = 0;
 TypeInfo = (llvm::Log2_32(getContext().getTypeSize(T)) << 1) |
(T->isSignedIntegerType() ? 1 : 0);
-// Follow suggestion from https://github.com/llvm/llvm-project/issues/64100
-// So we can write the exact amount of bits in TypeName after '\0'
-// making it .'\0'.<32-bit width>.
-if (T->isSignedIntegerType() && T->getAs()) {
-  // Do a sanity checks as we are using 32-bit type to store bit length.
-  assert((getContext().getTypeSize(T) > 0) &&
- " non positive amount of bits in __BitInt type");
-  assert((getContext().getTypeSize(T) <= 0x) &&
- " too many bits in __BitInt type");
-
-  // Redefine TypeKind with the actual __BitInt type if we have signed
-  // BitInt.
-  TypeKind = TK_BitInt;
-  IsBitInt = true;
-}
   } else if (T->isFloatingType()) {
-TypeKind = TK_Float;
+TypeKind = 1;
 TypeInfo = getContext().getTypeSize(T);
   }
 
@@ -3359,20 +3324,6 @@ llvm::Constant 
*CodeGenFunction::EmitCheckTypeDescriptor(QualType T) {
   DiagnosticsEngine::ak_qualtype, (intptr_t)T.getAsOpaquePtr(), 
StringRef(),
   StringRef(), std::nullopt, Buffer, std::nullopt);
 
-  if (IsBitInt) {
-// The Structure is: 0 to end the string, 32 bit unsigned integer in target
-// endianness, zero.
-char S[6] = {'\0', '\0', '\0', '\0', '\0', '\0'};
-const auto *EIT = T->castAs();
-uint32_t Bits = EIT->getNumBits();
-llvm::support::endian::write32(S + 1, Bits,
-   getTarget().isBigEndian()
-   ? llvm::endianness::big
-   : llvm::endianness::little);
-StringRef str = StringRef(S, sizeof(S) / sizeof(decltype(S[0])));
-Buffer.append(str);
-  }
-
   llvm::Constant *

[llvm-branch-commits] [llvm] [AMDGPU][SILoadStoreOptimizer] Merge constrained sloads (PR #96162)

2024-06-20 Thread Stanislav Mekhanoshin via llvm-branch-commits


@@ -1701,17 +1732,33 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const 
CombineInfo &CI,
   return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
 }
   case S_LOAD_IMM:
-switch (Width) {
-default:
-  return 0;
-case 2:
-  return AMDGPU::S_LOAD_DWORDX2_IMM;
-case 3:
-  return AMDGPU::S_LOAD_DWORDX3_IMM;
-case 4:
-  return AMDGPU::S_LOAD_DWORDX4_IMM;
-case 8:
-  return AMDGPU::S_LOAD_DWORDX8_IMM;
+// For targets that support XNACK replay, use the constrained load opcode.
+if (STI && STI->hasXnackReplay()) {
+  switch (Width) {

rampitec wrote:

You can check alignment on the first load if MMO is available and avoid 
producing _ec version if it is sufficient.

https://github.com/llvm/llvm-project/pull/96162
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95821

>From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:02 -0700
Subject: [PATCH 01/11] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0fcb1c130002..2bca83c9d11ec 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto& [_, BF] : BC.getBinaryFunctions()) {
+if (!ProfiledFunctions.count(&BF))
+  continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
 

>From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:39 -0700
Subject: [PATCH 02/11] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2bca83c9d11ec..56474a67307ed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map StrictBinaryFunctionHashes;
+  std::unordered_map StrictBinaryFunctionHashes;
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
-  for (auto& [_, BF] : BC.getBinaryFunctions()) {
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
 if (!ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
@@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   for (auto YamlBF : YamlBP.Functions) {
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-if (It != StrictBinaryFunctionHashes.end() && 
!ProfiledFunctions.count(It->second)) {
+if (It != StrictBinaryFunctionHashes.end() &&
+!ProfiledFunctions.count(It->second)) {
   auto *BF = It->second;
   matchProfileToFunction(YamlBF, *BF);
 }

>From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:55:58 -0700
Subject: [PATCH 03/11] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp   | 2 +-
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 56474a67307ed..779d60bce3b66 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto &[_, BF] : BC.getBinaryFunctions()) {
-if (!ProfiledFunctions.count(&BF))
+if (ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
diff --git a/bolt/test/X86/profile-passthrough-block.test 
b/bolt/test/X86/profile-passthrough-block.test
index 1b875885260dc..ed2a8117ddfc4 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:main
 fid: 0
-hash:0x
+hash:0x0001
 exec:1
 nblocks: 6
 blocks:

>From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:58:22 -0700
Subject: [PATCH 04/11] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 779d60bce3b66..e3d30bfdb74e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   }
 
   for (auto YamlBF : YamlBP.Functions) {
+if (YamlBF.Used)
+  continue;
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
 if (It != StrictBinaryFunctionHashes.end() &&
 !ProfiledFunctions.count(It->second)) {

>From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 16:00:27 -0700
Subject: [PATCH 05/11] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 1 file changed, 1 insertion(+), 1 deleti

[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95821

>From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:02 -0700
Subject: [PATCH 01/12] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0fcb1c130002..2bca83c9d11ec 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto& [_, BF] : BC.getBinaryFunctions()) {
+if (!ProfiledFunctions.count(&BF))
+  continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
 

>From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:39 -0700
Subject: [PATCH 02/12] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2bca83c9d11ec..56474a67307ed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map StrictBinaryFunctionHashes;
+  std::unordered_map StrictBinaryFunctionHashes;
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
-  for (auto& [_, BF] : BC.getBinaryFunctions()) {
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
 if (!ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
@@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   for (auto YamlBF : YamlBP.Functions) {
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-if (It != StrictBinaryFunctionHashes.end() && 
!ProfiledFunctions.count(It->second)) {
+if (It != StrictBinaryFunctionHashes.end() &&
+!ProfiledFunctions.count(It->second)) {
   auto *BF = It->second;
   matchProfileToFunction(YamlBF, *BF);
 }

>From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:55:58 -0700
Subject: [PATCH 03/12] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp   | 2 +-
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 56474a67307ed..779d60bce3b66 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto &[_, BF] : BC.getBinaryFunctions()) {
-if (!ProfiledFunctions.count(&BF))
+if (ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
diff --git a/bolt/test/X86/profile-passthrough-block.test 
b/bolt/test/X86/profile-passthrough-block.test
index 1b875885260dc..ed2a8117ddfc4 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:main
 fid: 0
-hash:0x
+hash:0x0001
 exec:1
 nblocks: 6
 blocks:

>From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:58:22 -0700
Subject: [PATCH 04/12] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 779d60bce3b66..e3d30bfdb74e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   }
 
   for (auto YamlBF : YamlBP.Functions) {
+if (YamlBF.Used)
+  continue;
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
 if (It != StrictBinaryFunctionHashes.end() &&
 !ProfiledFunctions.count(It->second)) {

>From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 16:00:27 -0700
Subject: [PATCH 05/12] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 1 file changed, 1 insertion(+), 1 deleti

[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95821

>From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:02 -0700
Subject: [PATCH 01/13] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0fcb1c130002..2bca83c9d11ec 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto& [_, BF] : BC.getBinaryFunctions()) {
+if (!ProfiledFunctions.count(&BF))
+  continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
 

>From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:39 -0700
Subject: [PATCH 02/13] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2bca83c9d11ec..56474a67307ed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map StrictBinaryFunctionHashes;
+  std::unordered_map StrictBinaryFunctionHashes;
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
-  for (auto& [_, BF] : BC.getBinaryFunctions()) {
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
 if (!ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
@@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   for (auto YamlBF : YamlBP.Functions) {
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-if (It != StrictBinaryFunctionHashes.end() && 
!ProfiledFunctions.count(It->second)) {
+if (It != StrictBinaryFunctionHashes.end() &&
+!ProfiledFunctions.count(It->second)) {
   auto *BF = It->second;
   matchProfileToFunction(YamlBF, *BF);
 }

>From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:55:58 -0700
Subject: [PATCH 03/13] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp   | 2 +-
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 56474a67307ed..779d60bce3b66 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto &[_, BF] : BC.getBinaryFunctions()) {
-if (!ProfiledFunctions.count(&BF))
+if (ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
diff --git a/bolt/test/X86/profile-passthrough-block.test 
b/bolt/test/X86/profile-passthrough-block.test
index 1b875885260dc..ed2a8117ddfc4 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:main
 fid: 0
-hash:0x
+hash:0x0001
 exec:1
 nblocks: 6
 blocks:

>From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:58:22 -0700
Subject: [PATCH 04/13] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 779d60bce3b66..e3d30bfdb74e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   }
 
   for (auto YamlBF : YamlBP.Functions) {
+if (YamlBF.Used)
+  continue;
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
 if (It != StrictBinaryFunctionHashes.end() &&
 !ProfiledFunctions.count(It->second)) {

>From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 16:00:27 -0700
Subject: [PATCH 05/13] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 1 file changed, 1 insertion(+), 1 deleti

[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung edited 
https://github.com/llvm/llvm-project/pull/95821
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Hash-based function matching (PR #95821)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95821

>From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:02 -0700
Subject: [PATCH 01/14] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0fcb1c130002..2bca83c9d11ec 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto& [_, BF] : BC.getBinaryFunctions()) {
+if (!ProfiledFunctions.count(&BF))
+  continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
 

>From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:39:39 -0700
Subject: [PATCH 02/14] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2bca83c9d11ec..56474a67307ed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map StrictBinaryFunctionHashes;
+  std::unordered_map StrictBinaryFunctionHashes;
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
-  for (auto& [_, BF] : BC.getBinaryFunctions()) {
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
 if (!ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
@@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   for (auto YamlBF : YamlBP.Functions) {
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-if (It != StrictBinaryFunctionHashes.end() && 
!ProfiledFunctions.count(It->second)) {
+if (It != StrictBinaryFunctionHashes.end() &&
+!ProfiledFunctions.count(It->second)) {
   auto *BF = It->second;
   matchProfileToFunction(YamlBF, *BF);
 }

>From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:55:58 -0700
Subject: [PATCH 03/14] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp   | 2 +-
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 56474a67307ed..779d60bce3b66 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto &[_, BF] : BC.getBinaryFunctions()) {
-if (!ProfiledFunctions.count(&BF))
+if (ProfiledFunctions.count(&BF))
   continue;
 StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
diff --git a/bolt/test/X86/profile-passthrough-block.test 
b/bolt/test/X86/profile-passthrough-block.test
index 1b875885260dc..ed2a8117ddfc4 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:main
 fid: 0
-hash:0x
+hash:0x0001
 exec:1
 nblocks: 6
 blocks:

>From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 15:58:22 -0700
Subject: [PATCH 04/14] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 779d60bce3b66..e3d30bfdb74e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   }
 
   for (auto YamlBF : YamlBP.Functions) {
+if (YamlBF.Used)
+  continue;
 auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
 if (It != StrictBinaryFunctionHashes.end() &&
 !ProfiledFunctions.count(It->second)) {

>From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Mon, 17 Jun 2024 16:00:27 -0700
Subject: [PATCH 05/14] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 1 file changed, 1 insertion(+), 1 deleti

[llvm-branch-commits] [compiler-rt] 4912d94 - Revert "mmapForContinuousMode: Align Linux's impl to __APPLE__'s more. NFC. (…"

2024-06-20 Thread via llvm-branch-commits

Author: NAKAMURA Takumi
Date: 2024-06-21T07:51:40+09:00
New Revision: 4912d941c1ead2299cbccee91aabe317a64b7c8a

URL: 
https://github.com/llvm/llvm-project/commit/4912d941c1ead2299cbccee91aabe317a64b7c8a
DIFF: 
https://github.com/llvm/llvm-project/commit/4912d941c1ead2299cbccee91aabe317a64b7c8a.diff

LOG: Revert "mmapForContinuousMode: Align Linux's impl to __APPLE__'s more. 
NFC. (…"

This reverts commit 7cf84d3b0bc5eda3a907dfd026d51a17e28114a3.

Added: 


Modified: 
compiler-rt/lib/profile/InstrProfilingFile.c

Removed: 




diff  --git a/compiler-rt/lib/profile/InstrProfilingFile.c 
b/compiler-rt/lib/profile/InstrProfilingFile.c
index 9faee36e5b815..b88e0b4b0b2ab 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -237,46 +237,24 @@ static int mmapForContinuousMode(uint64_t 
CurrentFileOffset, FILE *File) {
   const char *CountersEnd = __llvm_profile_end_counters();
   const char *BitmapBegin = __llvm_profile_begin_bitmap();
   const char *BitmapEnd = __llvm_profile_end_bitmap();
-  const char *NamesBegin = __llvm_profile_begin_names();
-  const char *NamesEnd = __llvm_profile_end_names();
-  const uint64_t NamesSize = (NamesEnd - NamesBegin) * sizeof(char);
   uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
-  uint64_t CountersSize =
-  __llvm_profile_get_counters_size(CountersBegin, CountersEnd);
-  uint64_t NumBitmapBytes =
-  __llvm_profile_get_num_bitmap_bytes(BitmapBegin, BitmapEnd);
   /* Get the file size. */
   uint64_t FileSize = 0;
   if (getProfileFileSizeForMerging(File, &FileSize))
 return 1;
 
-  int Fileno = fileno(File);
-  /* Determine how much padding is needed before/after the counters and
-   * after the names. */
-  uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
-  PaddingBytesAfterNames, PaddingBytesAfterBitmapBytes,
-  PaddingBytesAfterVTable, PaddingBytesAfterVNames;
-  __llvm_profile_get_padding_sizes_for_counters(
-  DataSize, CountersSize, NumBitmapBytes, NamesSize, /*VTableSize=*/0,
-  /*VNameSize=*/0, &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters,
-  &PaddingBytesAfterBitmapBytes, &PaddingBytesAfterNames,
-  &PaddingBytesAfterVTable, &PaddingBytesAfterVNames);
-
-  CurrentFileOffset = 0;
-  uint64_t FileOffsetToCounters = CurrentFileOffset +
-  sizeof(__llvm_profile_header) + DataSize +
-  PaddingBytesBeforeCounters;
-
   /* Map the profile. */
   char *Profile = (char *)mmap(NULL, FileSize, PROT_READ | PROT_WRITE,
-   MAP_SHARED, Fileno, 0);
+   MAP_SHARED, fileno(File), 0);
   if (Profile == MAP_FAILED) {
 PROF_ERR("Unable to mmap profile: %s\n", strerror(errno));
 return 1;
   }
+  const uint64_t CountersOffsetInBiasMode =
+  sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) + DataSize;
   /* Update the profile fields based on the current mapping. */
   INSTR_PROF_PROFILE_COUNTER_BIAS_VAR =
-  (intptr_t)Profile - (uintptr_t)CountersBegin + FileOffsetToCounters;
+  (intptr_t)Profile - (uintptr_t)CountersBegin + CountersOffsetInBiasMode;
 
   /* Return the memory allocated for counters to OS. */
   lprofReleaseMemoryPagesToOS((uintptr_t)CountersBegin, 
(uintptr_t)CountersEnd);



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lldb] b9af881 - Revert "[lldb/crashlog] Make interactive mode the new default (#94575)"

2024-06-20 Thread via llvm-branch-commits

Author: Med Ismail Bennani
Date: 2024-06-20T18:19:26-07:00
New Revision: b9af881866b3702be5d5bf55f694d4eb051e2872

URL: 
https://github.com/llvm/llvm-project/commit/b9af881866b3702be5d5bf55f694d4eb051e2872
DIFF: 
https://github.com/llvm/llvm-project/commit/b9af881866b3702be5d5bf55f694d4eb051e2872.diff

LOG: Revert "[lldb/crashlog] Make interactive mode the new default (#94575)"

This reverts commit aafa0ef900791857f55629bcf61c37f53cc0d2af.

Added: 


Modified: 
lldb/examples/python/crashlog.py
lldb/test/Shell/ScriptInterpreter/Python/Crashlog/altered_threadState.test
lldb/test/Shell/ScriptInterpreter/Python/Crashlog/json.test
lldb/test/Shell/ScriptInterpreter/Python/Crashlog/no_threadState.test

lldb/test/Shell/ScriptInterpreter/Python/Crashlog/skipped_status_interactive_crashlog.test
lldb/test/Shell/ScriptInterpreter/Python/Crashlog/text.test

Removed: 




diff  --git a/lldb/examples/python/crashlog.py 
b/lldb/examples/python/crashlog.py
index d3952e377c657..1c0d717ce455c 100755
--- a/lldb/examples/python/crashlog.py
+++ b/lldb/examples/python/crashlog.py
@@ -31,7 +31,6 @@
 import concurrent.futures
 import contextlib
 import datetime
-import enum
 import json
 import os
 import platform
@@ -46,6 +45,7 @@
 import time
 import uuid
 
+
 print_lock = threading.RLock()
 
 try:
@@ -1582,12 +1582,9 @@ def synchronous(debugger):
 debugger.RunCommandInterpreter(True, False, run_options, 0, 
False, True)
 
 
-class CrashLogLoadingMode(str, enum.Enum):
-batch = "batch"
-interactive = "interactive"
-
-
-def CreateSymbolicateCrashLogOptions(command_name, description):
+def CreateSymbolicateCrashLogOptions(
+command_name, description, add_interactive_options
+):
 usage = "crashlog [options]  [FILE ...]"
 arg_parser = argparse.ArgumentParser(
 description=description,
@@ -1603,12 +1600,6 @@ def CreateSymbolicateCrashLogOptions(command_name, 
description):
 help="crash report(s) to symbolicate",
 )
 
-arg_parser.add_argument(
-"-m",
-"--mode",
-choices=[mode.value for mode in CrashLogLoadingMode],
-help="change how the symbolicated process and threads are displayed to 
the user (default: interactive)",
-)
 arg_parser.add_argument(
 "--version",
 "-V",
@@ -1745,35 +1736,36 @@ def CreateSymbolicateCrashLogOptions(command_name, 
description):
 help=argparse.SUPPRESS,
 default=False,
 )
-arg_parser.add_argument(
-"--target",
-"-t",
-dest="target_path",
-help="the target binary path that should be used for interactive 
crashlog (optional)",
-default=None,
-)
-arg_parser.add_argument(
-"--skip-status",
-"-s",
-dest="skip_status",
-action="store_true",
-help="prevent the interactive crashlog to dump the process status and 
thread backtrace at launch",
-default=False,
-)
-legacy_group = arg_parser.add_mutually_exclusive_group()
-legacy_group.add_argument(
-"-i",
-"--interactive",
-action="store_true",
-help=argparse.SUPPRESS,
-)
-legacy_group.add_argument(
-"-b",
-"--batch",
-action="store_true",
-help=argparse.SUPPRESS,
-)
-
+if add_interactive_options:
+arg_parser.add_argument(
+"-i",
+"--interactive",
+action="store_true",
+help="parse a crash log and load it in a ScriptedProcess",
+default=False,
+)
+arg_parser.add_argument(
+"-b",
+"--batch",
+action="store_true",
+help="dump symbolicated stackframes without creating a debug 
session",
+default=True,
+)
+arg_parser.add_argument(
+"--target",
+"-t",
+dest="target_path",
+help="the target binary path that should be used for interactive 
crashlog (optional)",
+default=None,
+)
+arg_parser.add_argument(
+"--skip-status",
+"-s",
+dest="skip_status",
+action="store_true",
+help="prevent the interactive crashlog to dump the process status 
and thread backtrace at launch",
+default=False,
+)
 return arg_parser
 
 
@@ -1786,7 +1778,7 @@ def CrashLogOptionParser():
 created that has all of the shared libraries loaded at the load addresses 
found in the crash log file. This allows
 you to explore the program as if it were stopped at the locations described in 
the crash log and functions can
 be disassembled and lookups can be performed using the addresses found in the 
crash log."""
-return CreateSymbolicateCrashLogOptions("crashlog", description)
+return CreateSymbolicateCrashLogOptions("crashlog", description, True)
 
 
 def

[llvm-branch-commits] [clang] 03921b9 - [serialization] No transitive type change (#92511)

2024-06-20 Thread via llvm-branch-commits

Author: Chuanqi Xu
Date: 2024-06-21T09:21:40+08:00
New Revision: 03921b979d67657bfc9cf8240add2484cc4df6a7

URL: 
https://github.com/llvm/llvm-project/commit/03921b979d67657bfc9cf8240add2484cc4df6a7
DIFF: 
https://github.com/llvm/llvm-project/commit/03921b979d67657bfc9cf8240add2484cc4df6a7.diff

LOG: [serialization] No transitive type change (#92511)

Following of https://github.com/llvm/llvm-project/pull/92085. 

 motivation

The motivation is still cutting of the unnecessary change in the
dependency chain. See the above link (recursively) for details.

And this will be the last patch of the `no-transitive-*-change` series.
If there are any following patches, they might be C++20 Named modules
specific to handle special grammars like `ADL` (See the reply in
https://discourse.llvm.org/t/rfc-c-20-modules-introduce-thin-bmi-and-decls-hash/74755/53
for example). So they won't affect the whole serialization part as the
series patch did.

 example

After this patch, finally we are able to cut of unnecessary change of
types. For example,

```

//--- m-partA.cppm
export module m:partA;

//--- m-partA.v1.cppm
export module m:partA;

namespace NS {
class A {
public:
int getValue() {
return 43;
}
};
}

//--- m-partB.cppm
export module m:partB;

export inline int getB() {
return 430;
}

//--- m.cppm
export module m;
export import :partA;
export import :partB;

//--- useBOnly.cppm
export module useBOnly;
import m;

export inline int get() {
return getB();
}
```

The BMI of `useBOnly.cppm` is expected to not change if we only add a
new class in `m:partA`. This will be pretty useful in practice.

 implementation details

The key idea of this patch is similar with the previous patches: extend
the 32bits type ID to 64bits so that we can store the module file index
in the higher bits. Then the encoding of the type ID is independent on
the imported modules.

But there are two differences from the previous patches:
- TypeID is not completely an index of serialized types. We used the
lower 3 bits to store the qualifiers.
- TypeID won't take part in any lookup process. So the uses of TypeID is
much less than the previous patches.

The first difference make we have some more slightly complex bit
operations. And the second difference makes the patch much simpler than
the previous ones.

Added: 
clang/test/Modules/no-transitive-type-change.cppm

Modified: 
clang/include/clang/Serialization/ASTBitCodes.h
clang/include/clang/Serialization/ASTReader.h
clang/include/clang/Serialization/ASTRecordReader.h
clang/include/clang/Serialization/ModuleFile.h
clang/lib/Serialization/ASTCommon.cpp
clang/lib/Serialization/ASTReader.cpp
clang/lib/Serialization/ASTWriter.cpp
clang/lib/Serialization/ModuleFile.cpp
clang/test/Modules/no-transitive-decls-change.cppm
clang/test/Modules/no-transitive-identifier-change.cppm

Removed: 




diff  --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index 316350d779e90..38502a23f805e 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -26,6 +26,7 @@
 #include "clang/Serialization/SourceLocationEncoding.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Support/MathExtras.h"
 #include 
 #include 
 
@@ -70,41 +71,64 @@ using DeclID = DeclIDBase::DeclID;
 
 /// An ID number that refers to a type in an AST file.
 ///
-/// The ID of a type is partitioned into two parts: the lower
-/// three bits are used to store the const/volatile/restrict
-/// qualifiers (as with QualType) and the upper bits provide a
-/// type index. The type index values are partitioned into two
+/// The ID of a type is partitioned into three parts:
+/// - the lower three bits are used to store the const/volatile/restrict
+///   qualifiers (as with QualType).
+/// - the next 29 bits provide a type index in the corresponding
+///   module file.
+/// - the upper 32 bits provide a module file index.
+///
+/// The type index values are partitioned into two
 /// sets. The values below NUM_PREDEF_TYPE_IDs are predefined type
 /// IDs (based on the PREDEF_TYPE_*_ID constants), with 0 as a
-/// placeholder for "no type". Values from NUM_PREDEF_TYPE_IDs are
-/// other types that have serialized representations.
-using TypeID = uint32_t;
+/// placeholder for "no type". The module file index for predefined
+/// types are always 0 since they don't belong to any modules.
+/// Values from NUM_PREDEF_TYPE_IDs are other types that have
+/// serialized representations.
+using TypeID = uint64_t;
+/// Same with TypeID except that the LocalTypeID is only meaningful
+/// with the corresponding ModuleFile.
+///
+/// FIXME: Make TypeID and LocalTypeID a class to improve the type
+/// safety.
+using LocalT

[llvm-branch-commits] [clang] 5fbb65e - Merge branch 'main' into revert-94575-crashlog-default-interactive-mode

2024-06-20 Thread via llvm-branch-commits

Author: Med Ismail Bennani
Date: 2024-06-20T18:23:18-07:00
New Revision: 5fbb65ed892418ef52fb9903b91302210ece8ce9

URL: 
https://github.com/llvm/llvm-project/commit/5fbb65ed892418ef52fb9903b91302210ece8ce9
DIFF: 
https://github.com/llvm/llvm-project/commit/5fbb65ed892418ef52fb9903b91302210ece8ce9.diff

LOG: Merge branch 'main' into revert-94575-crashlog-default-interactive-mode

Added: 
clang/test/Modules/no-transitive-type-change.cppm

Modified: 
clang/include/clang/Serialization/ASTBitCodes.h
clang/include/clang/Serialization/ASTReader.h
clang/include/clang/Serialization/ASTRecordReader.h
clang/include/clang/Serialization/ModuleFile.h
clang/lib/Serialization/ASTCommon.cpp
clang/lib/Serialization/ASTReader.cpp
clang/lib/Serialization/ASTWriter.cpp
clang/lib/Serialization/ModuleFile.cpp
clang/test/Modules/no-transitive-decls-change.cppm
clang/test/Modules/no-transitive-identifier-change.cppm

Removed: 




diff  --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index 316350d779e90..38502a23f805e 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -26,6 +26,7 @@
 #include "clang/Serialization/SourceLocationEncoding.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Bitstream/BitCodes.h"
+#include "llvm/Support/MathExtras.h"
 #include 
 #include 
 
@@ -70,41 +71,64 @@ using DeclID = DeclIDBase::DeclID;
 
 /// An ID number that refers to a type in an AST file.
 ///
-/// The ID of a type is partitioned into two parts: the lower
-/// three bits are used to store the const/volatile/restrict
-/// qualifiers (as with QualType) and the upper bits provide a
-/// type index. The type index values are partitioned into two
+/// The ID of a type is partitioned into three parts:
+/// - the lower three bits are used to store the const/volatile/restrict
+///   qualifiers (as with QualType).
+/// - the next 29 bits provide a type index in the corresponding
+///   module file.
+/// - the upper 32 bits provide a module file index.
+///
+/// The type index values are partitioned into two
 /// sets. The values below NUM_PREDEF_TYPE_IDs are predefined type
 /// IDs (based on the PREDEF_TYPE_*_ID constants), with 0 as a
-/// placeholder for "no type". Values from NUM_PREDEF_TYPE_IDs are
-/// other types that have serialized representations.
-using TypeID = uint32_t;
+/// placeholder for "no type". The module file index for predefined
+/// types are always 0 since they don't belong to any modules.
+/// Values from NUM_PREDEF_TYPE_IDs are other types that have
+/// serialized representations.
+using TypeID = uint64_t;
+/// Same with TypeID except that the LocalTypeID is only meaningful
+/// with the corresponding ModuleFile.
+///
+/// FIXME: Make TypeID and LocalTypeID a class to improve the type
+/// safety.
+using LocalTypeID = TypeID;
 
 /// A type index; the type ID with the qualifier bits removed.
+/// Keep structure alignment 32-bit since the blob is assumed as 32-bit
+/// aligned.
 class TypeIdx {
+  uint32_t ModuleFileIndex = 0;
   uint32_t Idx = 0;
 
 public:
   TypeIdx() = default;
-  explicit TypeIdx(uint32_t index) : Idx(index) {}
 
-  uint32_t getIndex() const { return Idx; }
+  explicit TypeIdx(uint32_t ModuleFileIdx, uint32_t Idx)
+  : ModuleFileIndex(ModuleFileIdx), Idx(Idx) {}
+
+  uint32_t getModuleFileIndex() const { return ModuleFileIndex; }
+
+  uint64_t getValue() const { return ((uint64_t)ModuleFileIndex << 32) | Idx; }
 
   TypeID asTypeID(unsigned FastQuals) const {
 if (Idx == uint32_t(-1))
   return TypeID(-1);
 
-return (Idx << Qualifiers::FastWidth) | FastQuals;
+unsigned Index = (Idx << Qualifiers::FastWidth) | FastQuals;
+return ((uint64_t)ModuleFileIndex << 32) | Index;
   }
 
   static TypeIdx fromTypeID(TypeID ID) {
 if (ID == TypeID(-1))
-  return TypeIdx(-1);
+  return TypeIdx(0, -1);
 
-return TypeIdx(ID >> Qualifiers::FastWidth);
+return TypeIdx(ID >> 32, (ID & llvm::maskTrailingOnes(32)) >>
+ Qualifiers::FastWidth);
   }
 };
 
+static_assert(alignof(TypeIdx) == 4);
+
 /// A structure for putting "fast"-unqualified QualTypes into a
 /// DenseMap.  This uses the standard pointer hash function.
 struct UnsafeQualTypeDenseMapInfo {

diff  --git a/clang/include/clang/Serialization/ASTReader.h 
b/clang/include/clang/Serialization/ASTReader.h
index 0e95d82928459..f41c473c97cd9 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -491,14 +491,6 @@ class ASTReader
   /// ID = (I + 1) << FastQual::Width has already been loaded
   llvm::PagedVector TypesLoaded;
 
-  using GlobalTypeMapType =
-  ContinuousRangeMap;
-
-  /// Mapping from global type IDs to the module in which the
-  /// type resides along with the of

[llvm-branch-commits] [OpenMP][MLIR] Descriptor explicit member map lowering changes (PR #96265)

2024-06-20 Thread via llvm-branch-commits

https://github.com/agozillon created 
https://github.com/llvm/llvm-project/pull/96265

This is one of 3 PRs in a PR stack that aims to add support for explicit 
mapping of
allocatable members in derived types.

The primary changes in this PR are the OpenMPToLLVMIRTranslation.cpp changes,
which are small and seek to alter the current member mapping to add an
additional map insertion for pointers. Effectively, if the member is a pointer
(currently indicated by having a varPtrPtr field) we add an additional map for
the pointer and then alter the subsequent mapping of the member (the data)
to utilise the member rather than the parents base pointer. This appears to be
necessary in certain cases when mapping pointer data within record types to
avoid segfaulting on device (due to incorrect data mapping). In general this
record type mapping may be simplifiable in the future.

There are also additions of tests which should help to showcase the affect
of the changes above.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [Flang][OpenMP] Derived type explicit allocatable member mapping (PR #96266)

2024-06-20 Thread via llvm-branch-commits

https://github.com/agozillon created 
https://github.com/llvm/llvm-project/pull/96266

This PR is one of 3 in a PR stack, this is the primary change set which seeks
to extend the current derived type explicit member mapping support to
handle descriptor member mapping at arbitrary levels of nesting. The PR
stack seems to do this reasonably (from testing so far) but as you can
create quite complex mappings with derived types (in particular when adding
allocatable derived types or arrays of allocatable derived types) I imagine
there will be hiccups, which I am more than happy to address. There will
also be further extensions to this work to handle the implicit auto-magical
mapping of descriptor members in derived types and a few other changes
planned for the future (with some ideas on optimizing things).

The changes in this PR primarily occur in the OpenMP lowering and
the OMPMapInfoFinalization pass.

In the OpenMP lowering several utility functions were added or extended
to support the generation of appropriate intermediate member mappings
which are currently required when the parent (or multiple parents) of a
mapped member are descriptor types. We need to map the entirety of
these types or do a "deep copy" for lack of a better term, where we map
both the base address and the descriptor as without the copying of both
of these we lack the information in the case of the descriptor to access the
member or attach the pointers data to the pointer and in the latter case we
require the base address to map the chunk of data. Currently we do not
segment descriptor based derived types as we do with regular
non-descriptor derived types, we effectively map their entirety in all
cases at the moment, I hope to address this at some point in the future as
it adds a fair bit of a performance penalty to having nestings of allocatable
derived types as an example. The process of mapping all intermediate
descriptor members in a members path only occurs if a member has
an allocatable or object parent in its symbol path or the member itself
is a member or allocatable. This occurs in the
createParentSymAndGenIntermediateMaps function, which will also
generate the appropriate address for the allocatable member
within the derived type to use as a the varPtr field of the map (for
intermediate allocatable maps and final allocatable mappings). In
this case it's necessary as we can't utilise the usual Fortran::lower
functionality such as gatherDataOperandAddrAndBounds without
causing issues later in the lowering due to extra allocas being spawned
which seem to affect the pointer attachment (at least this is my
current assumption, it results in memory access errors on the device
due to incorrect map information generation). This is similar
to why we do not use the MLIR value generated for this and utilise
the original symbol provided when mapping descriptor types external
to derived types. Hopefully this can be rectified in the future so this
function can be simplified and more closely aligned to the other type
mappings. We also make use of fir::CoordinateOp as opposed to the
HLFIR version as the HLFIR version doesn't support the appropriate
lowering to FIR necessary at the moment, we also cannot use a
single CoordinateOp (similarly to a single GEP) as when we index
through a descriptor operation (BoxType) we encounter issues later
in the lowering, however in either case we need access to intermediate
descriptors so individual CoordinateOp's aid this (although, being
able to compress them into a smaller amount of CoordinateOp's may
simplify the IR and perhaps result in a better end product, something
to consider for the future).

The other large change area was in the OMPMapInfoFinalization pass,
where the pass had to be extended to support the expansion of box
types (or multiple nestings of box types) within derived types, or box
type derived types. This requires expanding each BoxType mapping
from one into two maps and then modifying all of the existing
member indices of the overarching parent mapping to account for
the addition of these new members alongside adjusting the existing
member indices to support the addition of these new maps which
extend the original member indices (as a base address of a box type
is currently considered a member of the box type at a position of
0 as when lowered to LLVM-IR it's a pointer contained at this position
in the descriptor type, however, this means extending mapped children
of this expanded descriptor type to additionally incorporate the new
member index in the correct location in its own index list). I believe
there is a reasonable amount of comments that should aid in
understanding this better, alongside the test alterations for the pass.

A subset of the changes were also aimed at making some of the utilities
for packing and unpacking the DenseIntElementsAttr
containing the member indices shareable across the lowering and
OMPMapInfoFinalization, this required moving some functions to the
Low

[llvm-branch-commits] [OpenMP][MLIR] Descriptor explicit member map lowering changes (PR #96265)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-mlir-openmp

Author: None (agozillon)


Changes

This is one of 3 PRs in a PR stack that aims to add support for explicit 
mapping of
allocatable members in derived types.

The primary changes in this PR are the OpenMPToLLVMIRTranslation.cpp changes,
which are small and seek to alter the current member mapping to add an
additional map insertion for pointers. Effectively, if the member is a pointer
(currently indicated by having a varPtrPtr field) we add an additional map for
the pointer and then alter the subsequent mapping of the member (the data)
to utilise the member rather than the parents base pointer. This appears to be
necessary in certain cases when mapping pointer data within record types to
avoid segfaulting on device (due to incorrect data mapping). In general this
record type mapping may be simplifiable in the future.

There are also additions of tests which should help to showcase the affect
of the changes above.


---

Patch is 46.51 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/96265.diff


3 Files Affected:

- (modified) 
mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+31-3) 
- (added) 
mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir
 (+333) 
- (modified) 
mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir (+41-31) 


``diff
diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index cbfc64972f38b..a85dd6aafedad 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2245,10 +2245,13 @@ static llvm::omp::OpenMPOffloadMappingFlags 
mapParentWithMembers(
 mlir::dyn_cast(mapData.MapClause[mapDataIndex]);
 int firstMemberIdx = getMapDataMemberIdx(
 mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
-lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
-builder.getPtrTy());
 int lastMemberIdx = getMapDataMemberIdx(
 mapData, getFirstOrLastMappedMemberPtr(mapOp, false));
+
+// NOTE/TODO: Should perhaps use OriginalValue here instead of Pointers to
+// avoid offset or any manipulations interfering with the calculation.
+lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
+builder.getPtrTy());
 highAddr = builder.CreatePointerCast(
 builder.CreateGEP(mapData.BaseType[lastMemberIdx],
   mapData.Pointers[lastMemberIdx], 
builder.getInt64(1)),
@@ -2331,6 +2334,24 @@ static void processMapMembersWithParent(
 
 assert(memberDataIdx >= 0 && "could not find mapped member of structure");
 
+if (checkIfPointerMap(memberClause)) {
+  auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(
+  memberClause.getMapType().value());
+  mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
+  mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
+  ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
+  combinedInfo.Types.emplace_back(mapFlag);
+  combinedInfo.DevicePointers.emplace_back(
+  llvm::OpenMPIRBuilder::DeviceInfoTy::None);
+  combinedInfo.Names.emplace_back(
+  LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
+  combinedInfo.BasePointers.emplace_back(
+  mapData.BasePointers[mapDataIndex]);
+  combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
+  combinedInfo.Sizes.emplace_back(builder.getInt64(
+  
moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
+}
+
 // Same MemberOfFlag to indicate its link with parent and other members
 // of.
 auto mapFlag =
@@ -2346,7 +2367,14 @@ static void processMapMembersWithParent(
 llvm::OpenMPIRBuilder::DeviceInfoTy::None);
 combinedInfo.Names.emplace_back(
 LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
-combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
+
+if (checkIfPointerMap(memberClause))
+  combinedInfo.BasePointers.emplace_back(
+  mapData.BasePointers[memberDataIdx]);
+else
+  combinedInfo.BasePointers.emplace_back(
+  mapData.BasePointers[mapDataIndex]);
+
 combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
 combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
   }
diff --git 
a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir
 
b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir
new file mode 100644
index 0..e36caefe9afc4
--- /dev/null
+++ 
b/mlir/test/Target/

[llvm-branch-commits] [Flang][OpenMP] Derived type explicit allocatable member mapping (PR #96266)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: None (agozillon)


Changes

This PR is one of 3 in a PR stack, this is the primary change set which seeks
to extend the current derived type explicit member mapping support to
handle descriptor member mapping at arbitrary levels of nesting. The PR
stack seems to do this reasonably (from testing so far) but as you can
create quite complex mappings with derived types (in particular when adding
allocatable derived types or arrays of allocatable derived types) I imagine
there will be hiccups, which I am more than happy to address. There will
also be further extensions to this work to handle the implicit auto-magical
mapping of descriptor members in derived types and a few other changes
planned for the future (with some ideas on optimizing things).

The changes in this PR primarily occur in the OpenMP lowering and
the OMPMapInfoFinalization pass.

In the OpenMP lowering several utility functions were added or extended
to support the generation of appropriate intermediate member mappings
which are currently required when the parent (or multiple parents) of a
mapped member are descriptor types. We need to map the entirety of
these types or do a "deep copy" for lack of a better term, where we map
both the base address and the descriptor as without the copying of both
of these we lack the information in the case of the descriptor to access the
member or attach the pointers data to the pointer and in the latter case we
require the base address to map the chunk of data. Currently we do not
segment descriptor based derived types as we do with regular
non-descriptor derived types, we effectively map their entirety in all
cases at the moment, I hope to address this at some point in the future as
it adds a fair bit of a performance penalty to having nestings of allocatable
derived types as an example. The process of mapping all intermediate
descriptor members in a members path only occurs if a member has
an allocatable or object parent in its symbol path or the member itself
is a member or allocatable. This occurs in the
createParentSymAndGenIntermediateMaps function, which will also
generate the appropriate address for the allocatable member
within the derived type to use as a the varPtr field of the map (for
intermediate allocatable maps and final allocatable mappings). In
this case it's necessary as we can't utilise the usual Fortran::lower
functionality such as gatherDataOperandAddrAndBounds without
causing issues later in the lowering due to extra allocas being spawned
which seem to affect the pointer attachment (at least this is my
current assumption, it results in memory access errors on the device
due to incorrect map information generation). This is similar
to why we do not use the MLIR value generated for this and utilise
the original symbol provided when mapping descriptor types external
to derived types. Hopefully this can be rectified in the future so this
function can be simplified and more closely aligned to the other type
mappings. We also make use of fir::CoordinateOp as opposed to the
HLFIR version as the HLFIR version doesn't support the appropriate
lowering to FIR necessary at the moment, we also cannot use a
single CoordinateOp (similarly to a single GEP) as when we index
through a descriptor operation (BoxType) we encounter issues later
in the lowering, however in either case we need access to intermediate
descriptors so individual CoordinateOp's aid this (although, being
able to compress them into a smaller amount of CoordinateOp's may
simplify the IR and perhaps result in a better end product, something
to consider for the future).

The other large change area was in the OMPMapInfoFinalization pass,
where the pass had to be extended to support the expansion of box
types (or multiple nestings of box types) within derived types, or box
type derived types. This requires expanding each BoxType mapping
from one into two maps and then modifying all of the existing
member indices of the overarching parent mapping to account for
the addition of these new members alongside adjusting the existing
member indices to support the addition of these new maps which
extend the original member indices (as a base address of a box type
is currently considered a member of the box type at a position of
0 as when lowered to LLVM-IR it's a pointer contained at this position
in the descriptor type, however, this means extending mapped children
of this expanded descriptor type to additionally incorporate the new
member index in the correct location in its own index list). I believe
there is a reasonable amount of comments that should aid in
understanding this better, alongside the test alterations for the pass.

A subset of the changes were also aimed at making some of the utilities
for packing and unpacking the DenseIntElementsAttr
containing the member indices shareable across the lowering and
OMPMapInfoFinalization, this required moving some functions to the

[llvm-branch-commits] [Flang][OpenMP] Derived type explicit allocatable member mapping (PR #96266)

2024-06-20 Thread via llvm-branch-commits

agozillon wrote:

This is the top level PR of a 3 PR stack, the other PRs can be found here: 
https://github.com/llvm/llvm-project/pull/96264 
https://github.com/llvm/llvm-project/pull/96265 they're mostly tests, with some 
minor changes to the OpenMPToLLVMIRTranslation lowering, this PR has the main 
set of source code changes 

https://github.com/llvm/llvm-project/pull/96266
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] [llvm] release/18.x: [lld] Fix -ObjC load behavior with LTO (#92162) (PR #92478)

2024-06-20 Thread via llvm-branch-commits

https://github.com/AtariDreams closed 
https://github.com/llvm/llvm-project/pull/92478
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT] Name similarity function matching (PR #95884)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95884


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT] Name similarity function matching (PR #95884)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95884


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95884

>From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:42:00 -0700
Subject: [PATCH] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 73 --
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 66cabc236f4b2..c9f6d88f0b13a 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses name similarity to match functions that were not matched by name.
   uint64_t MatchedWithDemangledName = 0;
-  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-
-std::unordered_map NameToBinaryFunction;
-NameToBinaryFunction.reserve(BC.getBinaryFunctions().size());
 
-for (auto &[_, BF] : BC.getBinaryFunctions()) {
+  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
+auto DemangleName = [&](const char* String) {
   int Status = 0;
-  char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(),
+  char *DemangledName = abi::__cxa_demangle(String,
 nullptr, nullptr, &Status);
-  if (Status == 0)
-NameToBinaryFunction[std::string(DemangledName)] = &BF;
+  return Status == 0 ? new std::string(DemangledName) : nullptr;
+};
+
+auto DeriveNameSpace = [&](std::string DemangledName) {
+  size_t LParen = std::string(DemangledName).find("(");
+  std::string FunctionName = std::string(DemangledName).substr(0, LParen);
+  size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
+  return ScopeResolutionOperator == std::string::npos ? std::string("") : 
std::string(DemangledName).substr(0, ScopeResolutionOperator);
+};
+
+std::unordered_map> 
NamespaceToBFs;
+NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
+
+for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+  std::string* DemangledName = 
DemangleName(BF->getOneName().str().c_str());
+  if (!DemangledName)
+continue;
+  std::string Namespace = DeriveNameSpace(*DemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
+NamespaceToBFs[Namespace] = {BF};
+  else
+It->second.push_back(BF);
 }
 
 for (auto YamlBF : YamlBP.Functions) {
   if (YamlBF.Used)
 continue;
-  int Status = 0;
-  char *DemangledName =
-  abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status);
-  if (Status != 0)
+  std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
+  if (!YamlBFDemangledName)
 continue;
-  auto It = NameToBinaryFunction.find(DemangledName);
-  if (It == NameToBinaryFunction.end())
+  std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
 continue;
-  BinaryFunction *BF = It->second;
-  matchProfileToFunction(YamlBF, *BF);
-  ++MatchedWithDemangledName;
+  std::vector BFs = It->second;
+
+  unsigned MinEditDistance = UINT_MAX;
+  BinaryFunction *ClosestNameBF = nullptr;
+
+  for (BinaryFunction *BF : BFs) {
+if (ProfiledFunctions.count(BF))
+  continue;
+std::string *BFDemangledName = 
DemangleName(BF->getOneName().str().c_str());
+if (!BFDemangledName)
+  continue;
+unsigned BFEditDistance = 
StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+if (BFEditDistance < MinEditDistance) {
+  MinEditDistance = BFEditDistance;
+  ClosestNameBF = BF;
+}
+  }
+
+  if (ClosestNameBF &&
+MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+matchProfileToFunction(YamlBF, *ClosestNameBF);
+++MatchedWithDemangledName;
+  }
 }
   }
 
+  outs() << MatchedWithDemangledName  << ": functions matched by name 
similarity\n";
+
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
 if (!YamlBF.Used && opts::Verbosity >= 1)
   errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

2024-06-20 Thread shaw young via llvm-branch-commits

https://github.com/shawbyoung updated 
https://github.com/llvm/llvm-project/pull/95884

>From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:42:00 -0700
Subject: [PATCH 1/2] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 73 --
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index 66cabc236f4b2..c9f6d88f0b13a 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses name similarity to match functions that were not matched by name.
   uint64_t MatchedWithDemangledName = 0;
-  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-
-std::unordered_map NameToBinaryFunction;
-NameToBinaryFunction.reserve(BC.getBinaryFunctions().size());
 
-for (auto &[_, BF] : BC.getBinaryFunctions()) {
+  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
+auto DemangleName = [&](const char* String) {
   int Status = 0;
-  char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(),
+  char *DemangledName = abi::__cxa_demangle(String,
 nullptr, nullptr, &Status);
-  if (Status == 0)
-NameToBinaryFunction[std::string(DemangledName)] = &BF;
+  return Status == 0 ? new std::string(DemangledName) : nullptr;
+};
+
+auto DeriveNameSpace = [&](std::string DemangledName) {
+  size_t LParen = std::string(DemangledName).find("(");
+  std::string FunctionName = std::string(DemangledName).substr(0, LParen);
+  size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
+  return ScopeResolutionOperator == std::string::npos ? std::string("") : 
std::string(DemangledName).substr(0, ScopeResolutionOperator);
+};
+
+std::unordered_map> 
NamespaceToBFs;
+NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
+
+for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+  std::string* DemangledName = 
DemangleName(BF->getOneName().str().c_str());
+  if (!DemangledName)
+continue;
+  std::string Namespace = DeriveNameSpace(*DemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
+NamespaceToBFs[Namespace] = {BF};
+  else
+It->second.push_back(BF);
 }
 
 for (auto YamlBF : YamlBP.Functions) {
   if (YamlBF.Used)
 continue;
-  int Status = 0;
-  char *DemangledName =
-  abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status);
-  if (Status != 0)
+  std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
+  if (!YamlBFDemangledName)
 continue;
-  auto It = NameToBinaryFunction.find(DemangledName);
-  if (It == NameToBinaryFunction.end())
+  std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
+  auto It = NamespaceToBFs.find(Namespace);
+  if (It == NamespaceToBFs.end())
 continue;
-  BinaryFunction *BF = It->second;
-  matchProfileToFunction(YamlBF, *BF);
-  ++MatchedWithDemangledName;
+  std::vector BFs = It->second;
+
+  unsigned MinEditDistance = UINT_MAX;
+  BinaryFunction *ClosestNameBF = nullptr;
+
+  for (BinaryFunction *BF : BFs) {
+if (ProfiledFunctions.count(BF))
+  continue;
+std::string *BFDemangledName = 
DemangleName(BF->getOneName().str().c_str());
+if (!BFDemangledName)
+  continue;
+unsigned BFEditDistance = 
StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+if (BFEditDistance < MinEditDistance) {
+  MinEditDistance = BFEditDistance;
+  ClosestNameBF = BF;
+}
+  }
+
+  if (ClosestNameBF &&
+MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+matchProfileToFunction(YamlBF, *ClosestNameBF);
+++MatchedWithDemangledName;
+  }
 }
   }
 
+  outs() << MatchedWithDemangledName  << ": functions matched by name 
similarity\n";
+
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
 if (!YamlBF.Used && opts::Verbosity >= 1)
   errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name

>From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001
From: shawbyoung 
Date: Thu, 20 Jun 2024 23:45:27 -0700
Subject: [PATCH 2/2] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp 
b/bolt/lib/Profile/YAMLProfileReader.cpp
index c9f6d88f0b13a..cf4a5393df8f4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -491,8 +491,6 @@ Error YAMLProfileReader::read

[llvm-branch-commits] [Hashing] Use a non-deterministic seed (PR #96282)

2024-06-20 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay created 
https://github.com/llvm/llvm-project/pull/96282

Hashing.h provides hash_value/hash_combine/hash_combine_range, which are
primarily used by

* `DenseMap`
* `FoldingSetNodeIDRef::ComputeHash` (will be fixed by #96136)

Users shouldn't rely on specific hash values due to potential algorithm
changes. `set_fixed_execution_hash_seed` is provided but it has never
been used.

Take the the address of a static storage duration variable as the seed
like absl/hash/internal/hash.h `kSeed`.
(See https://reviews.llvm.org/D93931 for workaround for older Clang.
Mach-O x86-64 forces PIC, so absl's `__apple_build_version__` check is
unnecessary.)

A few users relying on the iteration order of `DenseMap`
have been fixed (e.g., f8f4235612b9 c025bd1fdbbd 89e8e63f47ff
86eb6bf6715c eb8d03656549 0ea6b8e476c2 58d7a6e0e636 8ea31db27211
255986e27fcf).
>From my experience fixing `DenseMap` and
[`StringMap`](https://discourse.llvm.org/t/reverse-iteration-bots/72224)
iteration order issues, the scale of breakage is smaller.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [Hashing] Use a non-deterministic seed (PR #96282)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-adt

Author: Fangrui Song (MaskRay)


Changes

Hashing.h provides hash_value/hash_combine/hash_combine_range, which are
primarily used by

* `DenseMap`
* `FoldingSetNodeIDRef::ComputeHash` (will be fixed by #96136)

Users shouldn't rely on specific hash values due to potential algorithm
changes. `set_fixed_execution_hash_seed` is provided but it has never
been used.

Take the the address of a static storage duration variable as the seed
like absl/hash/internal/hash.h `kSeed`.
(See https://reviews.llvm.org/D93931 for workaround for older Clang.
Mach-O x86-64 forces PIC, so absl's `__apple_build_version__` check is
unnecessary.)

A few users relying on the iteration order of `DenseMap`
have been fixed (e.g., f8f4235612b9 c025bd1fdbbd 89e8e63f47ff
86eb6bf6715c eb8d03656549 0ea6b8e476c2 58d7a6e0e636 8ea31db27211
255986e27fcf).
>From my experience fixing `DenseMap` and
[`StringMap`](https://discourse.llvm.org/t/reverse-iteration-bots/72224)
iteration order issues, the scale of breakage is smaller.


---
Full diff: https://github.com/llvm/llvm-project/pull/96282.diff


5 Files Affected:

- (modified) llvm/include/llvm/ADT/Hashing.h (+11-34) 
- (modified) llvm/lib/Support/CMakeLists.txt (-1) 
- (removed) llvm/lib/Support/Hashing.cpp (-28) 
- (modified) llvm/unittests/ADT/HashingTest.cpp (-72) 
- (modified) llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn (-1) 


``diff
diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h
index a5477362a5079..3c87894635787 100644
--- a/llvm/include/llvm/ADT/Hashing.h
+++ b/llvm/include/llvm/ADT/Hashing.h
@@ -126,23 +126,6 @@ hash_code hash_value(const std::basic_string &arg);
 /// Compute a hash_code for a standard string.
 template  hash_code hash_value(const std::optional &arg);
 
-/// Override the execution seed with a fixed value.
-///
-/// This hashing library uses a per-execution seed designed to change on each
-/// run with high probability in order to ensure that the hash codes are not
-/// attackable and to ensure that output which is intended to be stable does
-/// not rely on the particulars of the hash codes produced.
-///
-/// That said, there are use cases where it is important to be able to
-/// reproduce *exactly* a specific behavior. To that end, we provide a function
-/// which will forcibly set the seed to a fixed value. This must be done at the
-/// start of the program, before any hashes are computed. Also, it cannot be
-/// undone. This makes it thread-hostile and very hard to use outside of
-/// immediately on start of a simple program designed for reproducible
-/// behavior.
-void set_fixed_execution_hash_seed(uint64_t fixed_value);
-
-
 // All of the implementation details of actually computing the various hash
 // code values are held within this namespace. These routines are included in
 // the header file mainly to allow inlining and constant propagation.
@@ -322,24 +305,18 @@ struct hash_state {
   }
 };
 
-
-/// A global, fixed seed-override variable.
-///
-/// This variable can be set using the \see llvm::set_fixed_execution_seed
-/// function. See that function for details. Do not, under any circumstances,
-/// set or read this variable.
-extern uint64_t fixed_seed_override;
-
+/// The seed is non-deterministic (address of a variable) to prevent having
+/// users depend on the particular hash values. On platforms without ASLR, this
+/// is still likely non-deterministic per build.
 inline uint64_t get_execution_seed() {
-  // FIXME: This needs to be a per-execution seed. This is just a placeholder
-  // implementation. Switching to a per-execution seed is likely to flush out
-  // instability bugs and so will happen as its own commit.
-  //
-  // However, if there is a fixed seed override set the first time this is
-  // called, return that instead of the per-execution seed.
-  const uint64_t seed_prime = 0xff51afd7ed558ccdULL;
-  static uint64_t seed = fixed_seed_override ? fixed_seed_override : 
seed_prime;
-  return seed;
+  static const char seed = 0;
+  // Work around x86-64 negative offset folding for old Clang -fno-pic
+  // https://reviews.llvm.org/D93931
+#if !defined(__clang__) || __clang_major__ > 11
+  return static_cast(reinterpret_cast(&seed));
+#else
+  return 0xff51afd7ed558ccdULL;
+#endif
 }
 
 
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 0c69ac99f5bc6..31343df4d8b8b 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -187,7 +187,6 @@ add_llvm_component_library(LLVMSupport
   FormatVariadic.cpp
   GlobPattern.cpp
   GraphWriter.cpp
-  Hashing.cpp
   HexagonAttributeParser.cpp
   HexagonAttributes.cpp
   InitLLVM.cpp
diff --git a/llvm/lib/Support/Hashing.cpp b/llvm/lib/Support/Hashing.cpp
deleted file mode 100644
index 1b20a670434f1..0
--- a/llvm/lib/Support/Hashing.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//===-- lib/Support/

[llvm-branch-commits] [Hashing] Use a non-deterministic seed (PR #96282)

2024-06-20 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-support

Author: Fangrui Song (MaskRay)


Changes

Hashing.h provides hash_value/hash_combine/hash_combine_range, which are
primarily used by

* `DenseMap`
* `FoldingSetNodeIDRef::ComputeHash` (will be fixed by #96136)

Users shouldn't rely on specific hash values due to potential algorithm
changes. `set_fixed_execution_hash_seed` is provided but it has never
been used.

Take the the address of a static storage duration variable as the seed
like absl/hash/internal/hash.h `kSeed`.
(See https://reviews.llvm.org/D93931 for workaround for older Clang.
Mach-O x86-64 forces PIC, so absl's `__apple_build_version__` check is
unnecessary.)

A few users relying on the iteration order of `DenseMap`
have been fixed (e.g., f8f4235612b9 c025bd1fdbbd 89e8e63f47ff
86eb6bf6715c eb8d03656549 0ea6b8e476c2 58d7a6e0e636 8ea31db27211
255986e27fcf).
>From my experience fixing `DenseMap` and
[`StringMap`](https://discourse.llvm.org/t/reverse-iteration-bots/72224)
iteration order issues, the scale of breakage is smaller.


---
Full diff: https://github.com/llvm/llvm-project/pull/96282.diff


5 Files Affected:

- (modified) llvm/include/llvm/ADT/Hashing.h (+11-34) 
- (modified) llvm/lib/Support/CMakeLists.txt (-1) 
- (removed) llvm/lib/Support/Hashing.cpp (-28) 
- (modified) llvm/unittests/ADT/HashingTest.cpp (-72) 
- (modified) llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn (-1) 


``diff
diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h
index a5477362a5079..3c87894635787 100644
--- a/llvm/include/llvm/ADT/Hashing.h
+++ b/llvm/include/llvm/ADT/Hashing.h
@@ -126,23 +126,6 @@ hash_code hash_value(const std::basic_string &arg);
 /// Compute a hash_code for a standard string.
 template  hash_code hash_value(const std::optional &arg);
 
-/// Override the execution seed with a fixed value.
-///
-/// This hashing library uses a per-execution seed designed to change on each
-/// run with high probability in order to ensure that the hash codes are not
-/// attackable and to ensure that output which is intended to be stable does
-/// not rely on the particulars of the hash codes produced.
-///
-/// That said, there are use cases where it is important to be able to
-/// reproduce *exactly* a specific behavior. To that end, we provide a function
-/// which will forcibly set the seed to a fixed value. This must be done at the
-/// start of the program, before any hashes are computed. Also, it cannot be
-/// undone. This makes it thread-hostile and very hard to use outside of
-/// immediately on start of a simple program designed for reproducible
-/// behavior.
-void set_fixed_execution_hash_seed(uint64_t fixed_value);
-
-
 // All of the implementation details of actually computing the various hash
 // code values are held within this namespace. These routines are included in
 // the header file mainly to allow inlining and constant propagation.
@@ -322,24 +305,18 @@ struct hash_state {
   }
 };
 
-
-/// A global, fixed seed-override variable.
-///
-/// This variable can be set using the \see llvm::set_fixed_execution_seed
-/// function. See that function for details. Do not, under any circumstances,
-/// set or read this variable.
-extern uint64_t fixed_seed_override;
-
+/// The seed is non-deterministic (address of a variable) to prevent having
+/// users depend on the particular hash values. On platforms without ASLR, this
+/// is still likely non-deterministic per build.
 inline uint64_t get_execution_seed() {
-  // FIXME: This needs to be a per-execution seed. This is just a placeholder
-  // implementation. Switching to a per-execution seed is likely to flush out
-  // instability bugs and so will happen as its own commit.
-  //
-  // However, if there is a fixed seed override set the first time this is
-  // called, return that instead of the per-execution seed.
-  const uint64_t seed_prime = 0xff51afd7ed558ccdULL;
-  static uint64_t seed = fixed_seed_override ? fixed_seed_override : 
seed_prime;
-  return seed;
+  static const char seed = 0;
+  // Work around x86-64 negative offset folding for old Clang -fno-pic
+  // https://reviews.llvm.org/D93931
+#if !defined(__clang__) || __clang_major__ > 11
+  return static_cast(reinterpret_cast(&seed));
+#else
+  return 0xff51afd7ed558ccdULL;
+#endif
 }
 
 
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 0c69ac99f5bc6..31343df4d8b8b 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -187,7 +187,6 @@ add_llvm_component_library(LLVMSupport
   FormatVariadic.cpp
   GlobPattern.cpp
   GraphWriter.cpp
-  Hashing.cpp
   HexagonAttributeParser.cpp
   HexagonAttributes.cpp
   InitLLVM.cpp
diff --git a/llvm/lib/Support/Hashing.cpp b/llvm/lib/Support/Hashing.cpp
deleted file mode 100644
index 1b20a670434f1..0
--- a/llvm/lib/Support/Hashing.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//===-- lib/Supp