https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/157055
>From 84fbdfea1fc1f9d7d61ef388df4d34eb2d0552d0 Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Fri, 5 Sep 2025 10:41:01 +0200 Subject: [PATCH 1/3] [libclc] Implement erf/erfc vector function with loop since scalar function is large This PR reduces amdgcn--amdhsa.bc size by 3% and nvptx64--nvidiacl.bc size by 4%. Loop trip count is constant and backend can decide whether to unroll. --- .../clc/shared/unary_def_scalarize_loop.inc | 26 +++++++++++++++++++ libclc/clc/lib/generic/math/clc_erf.cl | 2 +- libclc/clc/lib/generic/math/clc_erfc.cl | 2 +- 3 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc diff --git a/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc new file mode 100644 index 0000000000000..89cc52ae795e5 --- /dev/null +++ b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_VECSIZE_OR_1 >= 2 + +#include <clc/utils.h> + +#ifndef __CLC_IMPL_FUNCTION +#define __CLC_IMPL_FUNCTION __CLC_FUNCTION +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x) { + __CLC_GENTYPE result; + __CLC_SCALAR_GENTYPE *a = (__CLC_SCALAR_GENTYPE *)&x; + __CLC_SCALAR_GENTYPE *b = (__CLC_SCALAR_GENTYPE *)&result; + for (int i = 0; i < __CLC_VECSIZE_OR_1; ++i) + b[i] = __CLC_IMPL_FUNCTION(a[i]); + return result; +} + +#endif // __CLC_VECSIZE_OR_1 >= 2 diff --git a/libclc/clc/lib/generic/math/clc_erf.cl b/libclc/clc/lib/generic/math/clc_erf.cl index 34c7d586131e2..61a7c9d684aab 100644 --- a/libclc/clc/lib/generic/math/clc_erf.cl +++ b/libclc/clc/lib/generic/math/clc_erf.cl @@ -507,5 +507,5 @@ _CLC_OVERLOAD _CLC_DEF half __clc_erf(half x) { #endif #define __CLC_FUNCTION __clc_erf -#define __CLC_BODY <clc/shared/unary_def_scalarize.inc> +#define __CLC_BODY <clc/shared/unary_def_scalarize_loop.inc> #include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_erfc.cl b/libclc/clc/lib/generic/math/clc_erfc.cl index 7922807818ea2..01dbcd0c39ae1 100644 --- a/libclc/clc/lib/generic/math/clc_erfc.cl +++ b/libclc/clc/lib/generic/math/clc_erfc.cl @@ -518,5 +518,5 @@ _CLC_OVERLOAD _CLC_DEF half __clc_erfc(half x) { #endif #define __CLC_FUNCTION __clc_erfc -#define __CLC_BODY <clc/shared/unary_def_scalarize.inc> +#define __CLC_BODY <clc/shared/unary_def_scalarize_loop.inc> #include <clc/math/gentype.inc> >From bb7d77e31fd48116c51f8d30b5ad625893d0ddfb Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Fri, 5 Sep 2025 16:54:16 +0800 Subject: [PATCH 2/3] Update libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc Co-authored-by: Copilot <175728472+copi...@users.noreply.github.com> --- .../include/clc/shared/unary_def_scalarize_loop.inc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc index 89cc52ae795e5..8d947c326d168 100644 --- a/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc +++ b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc @@ -15,12 +15,14 @@ #endif _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE x) { - __CLC_GENTYPE result; - __CLC_SCALAR_GENTYPE *a = (__CLC_SCALAR_GENTYPE *)&x; - __CLC_SCALAR_GENTYPE *b = (__CLC_SCALAR_GENTYPE *)&result; + union { + __CLC_GENTYPE vec; + __CLC_SCALAR_GENTYPE arr[__CLC_VECSIZE_OR_1]; + } u_x, u_result; + u_x.vec = x; for (int i = 0; i < __CLC_VECSIZE_OR_1; ++i) - b[i] = __CLC_IMPL_FUNCTION(a[i]); - return result; + u_result.arr[i] = __CLC_IMPL_FUNCTION(u_x.arr[i]); + return u_result.vec; } #endif // __CLC_VECSIZE_OR_1 >= 2 >From 9e16da66e30e13029a425b5b5d45bea41581d5ec Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Fri, 5 Sep 2025 11:05:27 +0200 Subject: [PATCH 3/3] move #if below #include --- libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc index 8d947c326d168..544057b0e1378 100644 --- a/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc +++ b/libclc/clc/include/clc/shared/unary_def_scalarize_loop.inc @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#if __CLC_VECSIZE_OR_1 >= 2 - #include <clc/utils.h> +#if __CLC_VECSIZE_OR_1 >= 2 + #ifndef __CLC_IMPL_FUNCTION #define __CLC_IMPL_FUNCTION __CLC_FUNCTION #endif _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits