https://github.com/wenju-he updated 
https://github.com/llvm/llvm-project/pull/157055

>From 84fbdfea1fc1f9d7d61ef388df4d34eb2d0552d0 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju...@intel.com>
Date: Fri, 5 Sep 2025 10:41:01 +0200
Subject: [PATCH 1/3] [libclc] Implement erf/erfc vector function with loop
 since scalar function is large

This PR reduces amdgcn--amdhsa.bc size by 3% and nvptx64--nvidiacl.bc
size by 4%.
Loop trip count is constant and backend can decide whether to unroll.
---
 .../clc/shared/unary_def_scalarize_loop.inc   | 26 +++++++++++++++++++
 libclc/clc/lib/generic/math/clc_erf.cl        |  2 +-
 libclc/clc/lib/generic/math/clc_erfc.cl       |  2 +-
 3 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc

diff --git a/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc 
b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc
new file mode 100644
index 0000000000000..89cc52ae795e5
--- /dev/null
+++ b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_VECSIZE_OR_1 >= 2
+
+#include <clc/utils.h>
+
+#ifndef __CLC_IMPL_FUNCTION
+#define __CLC_IMPL_FUNCTION __CLC_FUNCTION
+#endif
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x) {
+  __CLC_GENTYPE result;
+  __CLC_SCALAR_GENTYPE *a = (__CLC_SCALAR_GENTYPE *)&x;
+  __CLC_SCALAR_GENTYPE *b = (__CLC_SCALAR_GENTYPE *)&result;
+  for (int i = 0; i < __CLC_VECSIZE_OR_1; ++i)
+    b[i] = __CLC_IMPL_FUNCTION(a[i]);
+  return result;
+}
+
+#endif // __CLC_VECSIZE_OR_1 >= 2
diff --git a/libclc/clc/lib/generic/math/clc_erf.cl 
b/libclc/clc/lib/generic/math/clc_erf.cl
index 34c7d586131e2..61a7c9d684aab 100644
--- a/libclc/clc/lib/generic/math/clc_erf.cl
+++ b/libclc/clc/lib/generic/math/clc_erf.cl
@@ -507,5 +507,5 @@ _CLC_OVERLOAD _CLC_DEF half __clc_erf(half x) {
 #endif
 
 #define __CLC_FUNCTION __clc_erf
-#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
+#define __CLC_BODY <clc/shared/unary_def_scalarize_loop.inc>
 #include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_erfc.cl 
b/libclc/clc/lib/generic/math/clc_erfc.cl
index 7922807818ea2..01dbcd0c39ae1 100644
--- a/libclc/clc/lib/generic/math/clc_erfc.cl
+++ b/libclc/clc/lib/generic/math/clc_erfc.cl
@@ -518,5 +518,5 @@ _CLC_OVERLOAD _CLC_DEF half __clc_erfc(half x) {
 #endif
 
 #define __CLC_FUNCTION __clc_erfc
-#define __CLC_BODY <clc/shared/unary_def_scalarize.inc>
+#define __CLC_BODY <clc/shared/unary_def_scalarize_loop.inc>
 #include <clc/math/gentype.inc>

>From bb7d77e31fd48116c51f8d30b5ad625893d0ddfb Mon Sep 17 00:00:00 2001
From: Wenju He <wenju...@intel.com>
Date: Fri, 5 Sep 2025 16:54:16 +0800
Subject: [PATCH 2/3] Update
 libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc

Co-authored-by: Copilot <175728472+copi...@users.noreply.github.com>
---
 .../include/clc/shared/unary_def_scalarize_loop.inc  | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc 
b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc
index 89cc52ae795e5..8d947c326d168 100644
--- a/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc
+++ b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc
@@ -15,12 +15,14 @@
 #endif
 
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x) {
-  __CLC_GENTYPE result;
-  __CLC_SCALAR_GENTYPE *a = (__CLC_SCALAR_GENTYPE *)&x;
-  __CLC_SCALAR_GENTYPE *b = (__CLC_SCALAR_GENTYPE *)&result;
+  union {
+    __CLC_GENTYPE vec;
+    __CLC_SCALAR_GENTYPE arr[__CLC_VECSIZE_OR_1];
+  } u_x, u_result;
+  u_x.vec = x;
   for (int i = 0; i < __CLC_VECSIZE_OR_1; ++i)
-    b[i] = __CLC_IMPL_FUNCTION(a[i]);
-  return result;
+    u_result.arr[i] = __CLC_IMPL_FUNCTION(u_x.arr[i]);
+  return u_result.vec;
 }
 
 #endif // __CLC_VECSIZE_OR_1 >= 2

>From 9e16da66e30e13029a425b5b5d45bea41581d5ec Mon Sep 17 00:00:00 2001
From: Wenju He <wenju...@intel.com>
Date: Fri, 5 Sep 2025 11:05:27 +0200
Subject: [PATCH 3/3] move #if below #include

---
 libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc 
b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc
index 8d947c326d168..544057b0e1378 100644
--- a/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc
+++ b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc
@@ -6,10 +6,10 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#if __CLC_VECSIZE_OR_1 >= 2
-
 #include <clc/utils.h>
 
+#if __CLC_VECSIZE_OR_1 >= 2
+
 #ifndef __CLC_IMPL_FUNCTION
 #define __CLC_IMPL_FUNCTION __CLC_FUNCTION
 #endif

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to